throughput.c (10800B)
1 /* See LICENSE for license details. */ 2 /* TODO(rnp): 3 * [ ]: for finer grained evaluation of throughput latency just queue a data upload 4 * without replacing the data. 5 * [ ]: bug: we aren't inserting rf data between each frame 6 */ 7 8 #define LIB_FN function 9 #include "ogl_beamformer_lib.c" 10 11 #include <signal.h> 12 #include <stdarg.h> 13 #include <stdio.h> 14 #include <stdlib.h> 15 #include <zstd.h> 16 17 global u32 g_output_points[4] = {512, 1, 1024, 1}; 18 global v2 g_axial_extent = {{ 10e-3, 165e-3}}; 19 global v2 g_lateral_extent = {{-60e-3, 60e-3}}; 20 global f32 g_f_number = 0.5; 21 22 typedef struct { 23 b32 loop; 24 b32 cuda; 25 u32 frame_number; 26 27 char **remaining; 28 i32 remaining_count; 29 } Options; 30 31 #define ZEMP_BP_MAGIC (uint64_t)0x5042504D455AFECAull 32 typedef struct { 33 u64 magic; 34 u32 version; 35 u16 decode_mode; 36 u16 beamform_mode; 37 u32 raw_data_dim[4]; 38 u32 decoded_data_dim[4]; 39 f32 xdc_element_pitch[2]; 40 f32 xdc_transform[16]; /* NOTE: column major order */ 41 i16 channel_mapping[256]; 42 f32 transmit_angles[256]; 43 f32 focal_depths[256]; 44 i16 sparse_elements[256]; 45 i16 hadamard_rows[256]; 46 f32 speed_of_sound; 47 f32 center_frequency; 48 f32 sampling_frequency; 49 f32 time_offset; 50 u32 transmit_mode; 51 } zemp_bp_v1; 52 53 global b32 g_should_exit; 54 55 #define die(...) die_((char *)__func__, __VA_ARGS__) 56 function no_return void 57 die_(char *function_name, char *format, ...) 58 { 59 if (function_name) 60 fprintf(stderr, "%s: ", function_name); 61 62 va_list ap; 63 64 va_start(ap, format); 65 vfprintf(stderr, format, ap); 66 va_end(ap); 67 68 os_exit(1); 69 } 70 71 #if OS_LINUX 72 73 #include <fcntl.h> 74 #include <sys/stat.h> 75 #include <unistd.h> 76 77 function void os_init_timer(void) { } 78 79 function f64 80 os_get_time(void) 81 { 82 f64 result = (f64)os_get_timer_counter() / os_get_timer_frequency(); 83 return result; 84 } 85 86 function s8 87 os_read_file_simp(char *fname) 88 { 89 s8 result; 90 i32 fd = open(fname, O_RDONLY); 91 if (fd < 0) 92 die("couldn't open file: %s\n", fname); 93 94 struct stat st; 95 if (stat(fname, &st) < 0) 96 die("couldn't stat file\n"); 97 98 result.len = st.st_size; 99 result.data = malloc(st.st_size); 100 if (!result.data) 101 die("couldn't alloc space for reading\n"); 102 103 iz rlen = read(fd, result.data, st.st_size); 104 close(fd); 105 106 if (rlen != st.st_size) 107 die("couldn't read file: %s\n", fname); 108 109 return result; 110 } 111 112 #elif OS_WINDOWS 113 114 global os_w32_context os_context; 115 116 function void 117 os_init_timer(void) 118 { 119 os_context.timer_frequency = os_get_timer_frequency(); 120 } 121 122 function f64 123 os_get_time(void) 124 { 125 f64 result = (f64)os_get_timer_counter() / os_context.timer_frequency; 126 return result; 127 } 128 129 function s8 130 os_read_file_simp(char *fname) 131 { 132 s8 result; 133 iptr h = CreateFileA(fname, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0); 134 if (h == INVALID_FILE) 135 die("couldn't open file: %s\n", fname); 136 137 w32_file_info fileinfo; 138 if (!GetFileInformationByHandle(h, &fileinfo)) 139 die("couldn't get file info\n", stderr); 140 141 result.len = fileinfo.nFileSizeLow; 142 result.data = malloc(fileinfo.nFileSizeLow); 143 if (!result.data) 144 die("couldn't alloc space for reading\n"); 145 146 i32 rlen = 0; 147 if (!ReadFile(h, result.data, fileinfo.nFileSizeLow, &rlen, 0) && rlen != fileinfo.nFileSizeLow) 148 die("couldn't read file: %s\n", fname); 149 CloseHandle(h); 150 151 return result; 152 } 153 154 #else 155 #error Unsupported Platform 156 #endif 157 158 function void 159 stream_ensure_termination(Stream *s, u8 byte) 160 { 161 b32 found = 0; 162 if (!s->errors && s->widx > 0) 163 found = s->data[s->widx - 1] == byte; 164 if (!found) { 165 s->errors |= s->cap - 1 < s->widx; 166 if (!s->errors) 167 s->data[s->widx++] = byte; 168 } 169 } 170 171 function void 172 stream_append_u64_width(Stream *s, u64 n, u64 min_width) 173 { 174 u8 tmp[64]; 175 u8 *end = tmp + sizeof(tmp); 176 u8 *beg = end; 177 min_width = MIN(sizeof(tmp), min_width); 178 179 do { *--beg = '0' + (n % 10); } while (n /= 10); 180 while (end - beg > 0 && end - beg < min_width) 181 *--beg = '0'; 182 183 stream_append(s, beg, end - beg); 184 } 185 186 function void * 187 decompress_zstd_data(s8 raw) 188 { 189 iz requested_size = ZSTD_getFrameContentSize(raw.data, raw.len); 190 void *out = malloc(requested_size); 191 if (out) { 192 iz decompressed = ZSTD_decompress(out, requested_size, raw.data, raw.len); 193 if (decompressed != requested_size) { 194 free(out); 195 out = 0; 196 } 197 } 198 return out; 199 } 200 201 function zemp_bp_v1 * 202 read_zemp_bp_v1(u8 *path) 203 { 204 s8 raw = os_read_file_simp((char *)path); 205 zemp_bp_v1 *result = 0; 206 if (raw.len == sizeof(zemp_bp_v1) && *(u64 *)raw.data == ZEMP_BP_MAGIC) { 207 if (((zemp_bp_v1 *)raw.data)->version == 1) 208 result = (zemp_bp_v1 *)raw.data; 209 } 210 return result; 211 } 212 213 function void 214 fill_beamformer_parameters_from_zemp_bp_v1(zemp_bp_v1 *zbp, BeamformerParameters *out) 215 { 216 mem_copy(out->xdc_transform, zbp->xdc_transform, sizeof(out->xdc_transform)); 217 mem_copy(out->dec_data_dim, zbp->decoded_data_dim, sizeof(out->dec_data_dim)); 218 mem_copy(out->xdc_element_pitch, zbp->xdc_element_pitch, sizeof(out->xdc_element_pitch)); 219 mem_copy(out->rf_raw_dim, zbp->raw_data_dim, sizeof(out->rf_raw_dim)); 220 221 out->transmit_mode = zbp->transmit_mode; 222 out->decode = zbp->decode_mode; 223 out->das_shader_id = zbp->beamform_mode; 224 out->time_offset = zbp->time_offset; 225 out->sampling_frequency = zbp->sampling_frequency; 226 out->center_frequency = zbp->center_frequency; 227 out->speed_of_sound = zbp->speed_of_sound; 228 } 229 230 #define shift_n(v, c, n) v += n, c -= n 231 #define shift(v, c) shift_n(v, c, 1) 232 233 function void 234 usage(char *argv0) 235 { 236 die("%s [--loop] [--cuda] [--frame n] base_path study\n" 237 " --loop: reupload data forever\n" 238 " --cuda: use cuda for decoding\n" 239 " --frame n: use frame n of the data for display\n", 240 argv0); 241 } 242 243 function b32 244 s8_equal(s8 a, s8 b) 245 { 246 b32 result = a.len == b.len; 247 for (iz i = 0; result && i < a.len; i++) 248 result &= a.data[i] == b.data[i]; 249 return result; 250 } 251 252 function Options 253 parse_argv(i32 argc, char *argv[]) 254 { 255 Options result = {0}; 256 257 char *argv0 = argv[0]; 258 shift(argv, argc); 259 260 while (argc > 0) { 261 s8 arg = c_str_to_s8(*argv); 262 263 if (s8_equal(arg, s8("--loop"))) { 264 shift(argv, argc); 265 result.loop = 1; 266 } else if (s8_equal(arg, s8("--cuda"))) { 267 shift(argv, argc); 268 result.cuda = 1; 269 } else if (s8_equal(arg, s8("--frame"))) { 270 shift(argv, argc); 271 if (argc) { 272 result.frame_number = atoi(*argv); 273 shift(argv, argc); 274 } 275 } else if (arg.len > 0 && arg.data[0] == '-') { 276 usage(argv0); 277 } else { 278 break; 279 } 280 } 281 282 result.remaining = argv; 283 result.remaining_count = argc; 284 285 return result; 286 } 287 288 function i16 * 289 decompress_data_at_work_index(Stream *path_base, u32 index) 290 { 291 stream_append_byte(path_base, '_'); 292 stream_append_u64_width(path_base, index, 2); 293 stream_append_s8(path_base, s8(".zst")); 294 stream_ensure_termination(path_base, 0); 295 296 s8 compressed_data = os_read_file_simp((char *)path_base->data); 297 i16 *result = decompress_zstd_data(compressed_data); 298 if (!result) 299 die("failed to decompress data: %s\n", path_base->data); 300 free(compressed_data.data); 301 302 return result; 303 } 304 305 function b32 306 send_frame(i16 *restrict i16_data, BeamformerParameters *restrict bp) 307 { 308 b32 result = 0; 309 u32 data_size = bp->rf_raw_dim[0] * bp->rf_raw_dim[1] * sizeof(i16); 310 311 if (beamformer_push_data_with_compute(i16_data, data_size, IPT_XZ, 100)) 312 //if (beamformer_push_data(i16_data, data_size, 100)) 313 result = beamformer_start_compute(-1); 314 if (!result) printf("lib error: %s\n", beamformer_get_last_error_string()); 315 316 return result; 317 } 318 319 function void 320 execute_study(s8 study, Arena arena, Stream path, Options *options) 321 { 322 fprintf(stderr, "showing: %.*s\n", (i32)study.len, study.data); 323 324 stream_append_s8(&path, study); 325 stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR); 326 stream_append_s8(&path, study); 327 iz path_work_index = path.widx; 328 329 stream_append_s8(&path, s8(".bp")); 330 stream_ensure_termination(&path, 0); 331 332 zemp_bp_v1 *zbp = read_zemp_bp_v1(path.data); 333 if (!zbp) die("failed to unpack parameters file\n"); 334 335 BeamformerParameters bp = {0}; 336 fill_beamformer_parameters_from_zemp_bp_v1(zbp, &bp); 337 338 mem_copy(bp.output_points, g_output_points, sizeof(bp.output_points)); 339 bp.output_points[3] = 1; 340 341 bp.output_min_coordinate[0] = g_lateral_extent.x; 342 bp.output_min_coordinate[1] = 0; 343 bp.output_min_coordinate[2] = g_axial_extent.x; 344 bp.output_min_coordinate[3] = 0; 345 346 bp.output_max_coordinate[0] = g_lateral_extent.y; 347 bp.output_max_coordinate[1] = 0; 348 bp.output_max_coordinate[2] = g_axial_extent.y; 349 bp.output_max_coordinate[3] = 0; 350 351 bp.f_number = g_f_number; 352 bp.beamform_plane = 0; 353 bp.interpolate = 0; 354 355 if (zbp->sparse_elements[0] == -1) { 356 for (u32 i = 0; i < countof(zbp->sparse_elements); i++) 357 zbp->sparse_elements[i] = i; 358 } 359 360 { 361 align_as(64) v2 focal_vectors[countof(zbp->focal_depths)]; 362 for (u32 i = 0; i < countof(zbp->focal_depths); i++) 363 focal_vectors[i] = (v2){{zbp->transmit_angles[i], zbp->focal_depths[i]}}; 364 beamformer_push_focal_vectors((f32 *)focal_vectors, countof(focal_vectors), 0); 365 } 366 367 beamformer_push_channel_mapping(zbp->channel_mapping, countof(zbp->channel_mapping), 0); 368 beamformer_push_sparse_elements(zbp->sparse_elements, countof(zbp->sparse_elements), 0); 369 beamformer_push_parameters(&bp, 0); 370 371 free(zbp); 372 373 i32 shader_stages[16]; 374 i32 shader_stage_count = 0; 375 if (options->cuda) shader_stages[shader_stage_count++] = BeamformerShaderKind_CudaDecode; 376 else shader_stages[shader_stage_count++] = BeamformerShaderKind_Decode; 377 shader_stages[shader_stage_count++] = BeamformerShaderKind_DASCompute; 378 379 set_beamformer_pipeline(shader_stages, shader_stage_count); 380 381 stream_reset(&path, path_work_index); 382 i16 *data = decompress_data_at_work_index(&path, options->frame_number); 383 384 if (options->loop) { 385 u32 frame = 0; 386 f32 times[32] = {0}; 387 f32 data_size = bp.rf_raw_dim[0] * bp.rf_raw_dim[1] * sizeof(*data); 388 f64 start = os_get_time(); 389 for (;!g_should_exit;) { 390 if (send_frame(data, &bp)) { 391 f64 now = os_get_time(); 392 f32 delta = now - start; 393 start = now; 394 395 if ((frame % 16) == 0) { 396 f32 sum = 0; 397 for (u32 i = 0; i < countof(times); i++) 398 sum += times[i] / countof(times); 399 printf("Frame Time: %8.3f [ms] | 32-Frame Average: %8.3f [ms] | %8.3f GB/s\n", 400 delta * 1e3, sum * 1e3, data_size / (sum * (GB(1)))); 401 } 402 403 times[frame & 31] = delta; 404 frame++; 405 } 406 } 407 } else { 408 send_frame(data, &bp); 409 } 410 411 free(data); 412 } 413 414 function void 415 sigint(i32 _signo) 416 { 417 g_should_exit = 1; 418 } 419 420 extern i32 421 main(i32 argc, char *argv[]) 422 { 423 Options options = parse_argv(argc, argv); 424 425 if (!BETWEEN(options.remaining_count, 1, 2)) 426 usage(argv[0]); 427 428 os_init_timer(); 429 430 signal(SIGINT, sigint); 431 432 Arena arena = os_alloc_arena(KB(8)); 433 Stream path = stream_alloc(&arena, KB(4)); 434 stream_append_s8(&path, c_str_to_s8(options.remaining[0])); 435 stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR); 436 437 execute_study(c_str_to_s8(options.remaining[1]), arena, path, &options); 438 439 return 0; 440 }