throughput.c - ogl_beamforming - Ultrasound Beamforming Implemented with OpenGL

throughput.c (10524B)
      1 /* See LICENSE for license details. */
      2 /* TODO(rnp):
      3  * [ ]: for finer grained evaluation of throughput latency just queue a data upload
      4  *      without replacing the data.
      5  * [ ]: bug: we aren't inserting rf data between each frame
      6  */
      7 
      8 #define LIB_FN function
      9 #include "ogl_beamformer_lib.c"
     10 
     11 #include <signal.h>
     12 #include <stdarg.h>
     13 #include <stdio.h>
     14 #include <stdlib.h>
     15 #include <zstd.h>
     16 
     17 global i32 g_output_points[4] = {512, 1, 1024, 1};
     18 global v2  g_axial_extent     = {{ 10e-3f, 165e-3f}};
     19 global v2  g_lateral_extent   = {{-60e-3f,  60e-3f}};
     20 global f32 g_f_number         = 0.5f;
     21 
     22 typedef struct {
     23 	b32 loop;
     24 	b32 cuda;
     25 	u32 frame_number;
     26 
     27 	char **remaining;
     28 	i32    remaining_count;
     29 } Options;
     30 
     31 #define ZEMP_BP_MAGIC (uint64_t)0x5042504D455AFECAull
     32 typedef struct {
     33 	u64 magic;
     34 	u32 version;
     35 	u16 decode_mode;
     36 	u16 beamform_mode;
     37 	u32 raw_data_dim[4];
     38 	u32 decoded_data_dim[4];
     39 	f32 xdc_element_pitch[2];
     40 	f32 xdc_transform[16]; /* NOTE: column major order */
     41 	i16 channel_mapping[256];
     42 	f32 transmit_angles[256];
     43 	f32 focal_depths[256];
     44 	i16 sparse_elements[256];
     45 	i16 hadamard_rows[256];
     46 	f32 speed_of_sound;
     47 	f32 center_frequency;
     48 	f32 sampling_frequency;
     49 	f32 time_offset;
     50 	u32 transmit_mode;
     51 } zemp_bp_v1;
     52 
     53 global b32 g_should_exit;
     54 
     55 #define die(...) die_((char *)__func__, __VA_ARGS__)
     56 function no_return void
     57 die_(char *function_name, char *format, ...)
     58 {
     59 	if (function_name)
     60 		fprintf(stderr, "%s: ", function_name);
     61 
     62 	va_list ap;
     63 
     64 	va_start(ap, format);
     65 	vfprintf(stderr, format, ap);
     66 	va_end(ap);
     67 
     68 	os_exit(1);
     69 }
     70 
     71 #if OS_LINUX
     72 
     73 #include <fcntl.h>
     74 #include <sys/stat.h>
     75 #include <unistd.h>
     76 
     77 function void os_init_timer(void) { }
     78 
     79 function f64
     80 os_get_time(void)
     81 {
     82 	f64 result = (f64)os_get_timer_counter() / (f64)os_get_timer_frequency();
     83 	return result;
     84 }
     85 
     86 function s8
     87 os_read_file_simp(char *fname)
     88 {
     89 	s8 result;
     90 	i32 fd = open(fname, O_RDONLY);
     91 	if (fd < 0)
     92 		die("couldn't open file: %s\n", fname);
     93 
     94 	struct stat st;
     95 	if (stat(fname, &st) < 0)
     96 		die("couldn't stat file\n");
     97 
     98 	result.len  = st.st_size;
     99 	result.data = malloc((uz)st.st_size);
    100 	if (!result.data)
    101 		die("couldn't alloc space for reading\n");
    102 
    103 	iz rlen = read(fd, result.data, (u32)st.st_size);
    104 	close(fd);
    105 
    106 	if (rlen != st.st_size)
    107 		die("couldn't read file: %s\n", fname);
    108 
    109 	return result;
    110 }
    111 
    112 #elif OS_WINDOWS
    113 
    114 global w32_context os_context;
    115 
    116 function void
    117 os_init_timer(void)
    118 {
    119 	os_context.timer_frequency = os_get_timer_frequency();
    120 }
    121 
    122 function f64
    123 os_get_time(void)
    124 {
    125 	f64 result = (f64)os_get_timer_counter() / (f64)os_context.timer_frequency;
    126 	return result;
    127 }
    128 
    129 function s8
    130 os_read_file_simp(char *fname)
    131 {
    132 	s8 result;
    133 	iptr h = CreateFileA(fname, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0);
    134 	if (h == INVALID_FILE)
    135 		die("couldn't open file: %s\n", fname);
    136 
    137 	w32_file_info fileinfo;
    138 	if (!GetFileInformationByHandle(h, &fileinfo))
    139 		die("couldn't get file info\n", stderr);
    140 
    141 	result.len  = fileinfo.nFileSizeLow;
    142 	result.data = malloc(fileinfo.nFileSizeLow);
    143 	if (!result.data)
    144 		die("couldn't alloc space for reading\n");
    145 
    146 	i32 rlen = 0;
    147 	if (!ReadFile(h, result.data, (i32)fileinfo.nFileSizeLow, &rlen, 0) && rlen != (i32)fileinfo.nFileSizeLow)
    148 		die("couldn't read file: %s\n", fname);
    149 	CloseHandle(h);
    150 
    151 	return result;
    152 }
    153 
    154 #else
    155 #error Unsupported Platform
    156 #endif
    157 
    158 function void
    159 stream_ensure_termination(Stream *s, u8 byte)
    160 {
    161 	b32 found = 0;
    162 	if (!s->errors && s->widx > 0)
    163 		found = s->data[s->widx - 1] == byte;
    164 	if (!found) {
    165 		s->errors |= s->cap - 1 < s->widx;
    166 		if (!s->errors)
    167 			s->data[s->widx++] = byte;
    168 	}
    169 }
    170 
    171 function void *
    172 decompress_zstd_data(s8 raw)
    173 {
    174 	uz requested_size = ZSTD_getFrameContentSize(raw.data, (uz)raw.len);
    175 	void *out         = malloc(requested_size);
    176 	if (out) {
    177 		uz decompressed = ZSTD_decompress(out, requested_size, raw.data, (uz)raw.len);
    178 		if (decompressed != requested_size) {
    179 			free(out);
    180 			out = 0;
    181 		}
    182 	}
    183 	return out;
    184 }
    185 
    186 function zemp_bp_v1 *
    187 read_zemp_bp_v1(u8 *path)
    188 {
    189 	s8 raw = os_read_file_simp((char *)path);
    190 	zemp_bp_v1 *result = 0;
    191 	if (raw.len == sizeof(zemp_bp_v1) && *(u64 *)raw.data == ZEMP_BP_MAGIC) {
    192 		if (((zemp_bp_v1 *)raw.data)->version == 1)
    193 			result = (zemp_bp_v1 *)raw.data;
    194 	}
    195 	return result;
    196 }
    197 
    198 function void
    199 fill_beamformer_parameters_from_zemp_bp_v1(zemp_bp_v1 *zbp, BeamformerParameters *out)
    200 {
    201 	mem_copy(out->xdc_transform,     zbp->xdc_transform,     sizeof(out->xdc_transform));
    202 	mem_copy(out->dec_data_dim,      zbp->decoded_data_dim,  sizeof(out->dec_data_dim));
    203 	mem_copy(out->xdc_element_pitch, zbp->xdc_element_pitch, sizeof(out->xdc_element_pitch));
    204 	mem_copy(out->rf_raw_dim,        zbp->raw_data_dim,      sizeof(out->rf_raw_dim));
    205 
    206 	out->transmit_mode      = (i32)zbp->transmit_mode;
    207 	out->decode             = zbp->decode_mode;
    208 	out->das_shader_id      = zbp->beamform_mode;
    209 	out->time_offset        = zbp->time_offset;
    210 	out->sampling_frequency = zbp->sampling_frequency;
    211 	out->center_frequency   = zbp->center_frequency;
    212 	out->speed_of_sound     = zbp->speed_of_sound;
    213 }
    214 
    215 #define shift_n(v, c, n) v += n, c -= n
    216 #define shift(v, c) shift_n(v, c, 1)
    217 
    218 function void
    219 usage(char *argv0)
    220 {
    221 	die("%s [--loop] [--cuda] [--frame n] base_path study\n"
    222 	    "    --loop:    reupload data forever\n"
    223 	    "    --cuda:    use cuda for decoding\n"
    224 	    "    --frame n: use frame n of the data for display\n",
    225 	    argv0);
    226 }
    227 
    228 function b32
    229 s8_equal(s8 a, s8 b)
    230 {
    231 	b32 result = a.len == b.len;
    232 	for (iz i = 0; result && i < a.len; i++)
    233 		result &= a.data[i] == b.data[i];
    234 	return result;
    235 }
    236 
    237 function Options
    238 parse_argv(i32 argc, char *argv[])
    239 {
    240 	Options result = {0};
    241 
    242 	char *argv0 = argv[0];
    243 	shift(argv, argc);
    244 
    245 	while (argc > 0) {
    246 		s8 arg = c_str_to_s8(*argv);
    247 
    248 		if (s8_equal(arg, s8("--loop"))) {
    249 			shift(argv, argc);
    250 			result.loop = 1;
    251 		} else if (s8_equal(arg, s8("--cuda"))) {
    252 			shift(argv, argc);
    253 			result.cuda = 1;
    254 		} else if (s8_equal(arg, s8("--frame"))) {
    255 			shift(argv, argc);
    256 			if (argc) {
    257 				result.frame_number = (u32)atoi(*argv);
    258 				shift(argv, argc);
    259 			}
    260 		} else if (arg.len > 0 && arg.data[0] == '-') {
    261 			usage(argv0);
    262 		} else {
    263 			break;
    264 		}
    265 	}
    266 
    267 	result.remaining       = argv;
    268 	result.remaining_count = argc;
    269 
    270 	return result;
    271 }
    272 
    273 function i16 *
    274 decompress_data_at_work_index(Stream *path_base, u32 index)
    275 {
    276 	stream_append_byte(path_base, '_');
    277 	stream_append_u64_width(path_base, index, 2);
    278 	stream_append_s8(path_base, s8(".zst"));
    279 	stream_ensure_termination(path_base, 0);
    280 
    281 	s8 compressed_data = os_read_file_simp((char *)path_base->data);
    282 	i16 *result = decompress_zstd_data(compressed_data);
    283 	if (!result)
    284 		die("failed to decompress data: %s\n", path_base->data);
    285 	free(compressed_data.data);
    286 
    287 	return result;
    288 }
    289 
    290 function b32
    291 send_frame(i16 *restrict i16_data, BeamformerParameters *restrict bp)
    292 {
    293 	b32 result    = 0;
    294 	u32 data_size = bp->rf_raw_dim[0] * bp->rf_raw_dim[1] * sizeof(i16);
    295 	if (beamformer_wait_for_compute_dispatch(10000))
    296 		result = beamformer_push_data_with_compute(i16_data, data_size, BeamformerViewPlaneTag_XZ, 100);
    297 	if (!result && !g_should_exit) printf("lib error: %s\n", beamformer_get_last_error_string());
    298 
    299 	return result;
    300 }
    301 
    302 function void
    303 execute_study(s8 study, Arena arena, Stream path, Options *options)
    304 {
    305 	fprintf(stderr, "showing: %.*s\n", (i32)study.len, study.data);
    306 
    307 	stream_append_s8(&path, study);
    308 	stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR);
    309 	stream_append_s8(&path, study);
    310 	i32 path_work_index = path.widx;
    311 
    312 	stream_append_s8(&path, s8(".bp"));
    313 	stream_ensure_termination(&path, 0);
    314 
    315 	zemp_bp_v1 *zbp = read_zemp_bp_v1(path.data);
    316 	if (!zbp) die("failed to unpack parameters file\n");
    317 
    318 	BeamformerParameters bp = {0};
    319 	fill_beamformer_parameters_from_zemp_bp_v1(zbp, &bp);
    320 
    321 	mem_copy(bp.output_points, g_output_points, sizeof(bp.output_points));
    322 	bp.output_points[3] = 1;
    323 
    324 	bp.output_min_coordinate[0] = g_lateral_extent.x;
    325 	bp.output_min_coordinate[1] = 0;
    326 	bp.output_min_coordinate[2] = g_axial_extent.x;
    327 	bp.output_min_coordinate[3] = 0;
    328 
    329 	bp.output_max_coordinate[0] = g_lateral_extent.y;
    330 	bp.output_max_coordinate[1] = 0;
    331 	bp.output_max_coordinate[2] = g_axial_extent.y;
    332 	bp.output_max_coordinate[3] = 0;
    333 
    334 	bp.f_number       = g_f_number;
    335 	bp.beamform_plane = 0;
    336 	bp.interpolate    = 0;
    337 
    338 	if (zbp->sparse_elements[0] == -1) {
    339 		for (i16 i = 0; i < countof(zbp->sparse_elements); i++)
    340 			zbp->sparse_elements[i] = i;
    341 	}
    342 
    343 	{
    344 		align_as(64) v2 focal_vectors[countof(zbp->focal_depths)];
    345 		for (u32 i = 0; i < countof(zbp->focal_depths); i++)
    346 			focal_vectors[i] = (v2){{zbp->transmit_angles[i], zbp->focal_depths[i]}};
    347 		beamformer_push_focal_vectors((f32 *)focal_vectors, countof(focal_vectors), 0);
    348 	}
    349 
    350 	beamformer_push_channel_mapping(zbp->channel_mapping, countof(zbp->channel_mapping), 0);
    351 	beamformer_push_sparse_elements(zbp->sparse_elements, countof(zbp->sparse_elements), 0);
    352 	beamformer_push_parameters(&bp, 0);
    353 
    354 	free(zbp);
    355 
    356 	i32 shader_stages[16];
    357 	u32 shader_stage_count = 0;
    358 	if (options->cuda) shader_stages[shader_stage_count++] = BeamformerShaderKind_CudaDecode;
    359 	else               shader_stages[shader_stage_count++] = BeamformerShaderKind_Decode;
    360 	shader_stages[shader_stage_count++] = BeamformerShaderKind_DAS;
    361 
    362 	set_beamformer_pipeline(shader_stages, shader_stage_count);
    363 
    364 	stream_reset(&path, path_work_index);
    365 	i16 *data = decompress_data_at_work_index(&path, options->frame_number);
    366 
    367 	if (options->loop) {
    368 		u32 frame = 0;
    369 		f32 times[32] = {0};
    370 		f32 data_size = (f32)(bp.rf_raw_dim[0] * bp.rf_raw_dim[1] * sizeof(*data));
    371 		f64 start = os_get_time();
    372 		for (;!g_should_exit;) {
    373 			if (send_frame(data, &bp)) {
    374 				f64 now   = os_get_time();
    375 				f32 delta = (f32)(now - start);
    376 				start = now;
    377 
    378 				if ((frame % 16) == 0) {
    379 					f32 sum = 0;
    380 					for (u32 i = 0; i < countof(times); i++)
    381 						sum += times[i] / countof(times);
    382 					printf("Frame Time: %8.3f [ms] | 32-Frame Average: %8.3f [ms] | %8.3f GB/s\n",
    383 					       delta * 1e3, sum * 1e3, data_size / (sum * (GB(1))));
    384 				}
    385 
    386 				times[frame & 31] = delta;
    387 				frame++;
    388 			}
    389 		}
    390 	} else {
    391 		send_frame(data, &bp);
    392 	}
    393 
    394 	free(data);
    395 }
    396 
    397 function void
    398 sigint(i32 _signo)
    399 {
    400 	g_should_exit = 1;
    401 }
    402 
    403 extern i32
    404 main(i32 argc, char *argv[])
    405 {
    406 	Options options = parse_argv(argc, argv);
    407 
    408 	if (!BETWEEN(options.remaining_count, 1, 2))
    409 		usage(argv[0]);
    410 
    411 	os_init_timer();
    412 
    413 	signal(SIGINT, sigint);
    414 
    415 	Arena arena = os_alloc_arena(KB(8));
    416 	Stream path = stream_alloc(&arena, KB(4));
    417 	stream_append_s8(&path, c_str_to_s8(options.remaining[0]));
    418 	stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR);
    419 
    420 	execute_study(c_str_to_s8(options.remaining[1]), arena, path, &options);
    421 
    422 	return 0;
    423 }