ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

throughput.c (10800B)


      1 /* See LICENSE for license details. */
      2 /* TODO(rnp):
      3  * [ ]: for finer grained evaluation of throughput latency just queue a data upload
      4  *      without replacing the data.
      5  * [ ]: bug: we aren't inserting rf data between each frame
      6  */
      7 
      8 #define LIB_FN function
      9 #include "ogl_beamformer_lib.c"
     10 
     11 #include <signal.h>
     12 #include <stdarg.h>
     13 #include <stdio.h>
     14 #include <stdlib.h>
     15 #include <zstd.h>
     16 
     17 global u32 g_output_points[4] = {512, 1, 1024, 1};
     18 global v2  g_axial_extent     = {{ 10e-3, 165e-3}};
     19 global v2  g_lateral_extent   = {{-60e-3,  60e-3}};
     20 global f32 g_f_number         = 0.5;
     21 
     22 typedef struct {
     23 	b32 loop;
     24 	b32 cuda;
     25 	u32 frame_number;
     26 
     27 	char **remaining;
     28 	i32    remaining_count;
     29 } Options;
     30 
     31 #define ZEMP_BP_MAGIC (uint64_t)0x5042504D455AFECAull
     32 typedef struct {
     33 	u64 magic;
     34 	u32 version;
     35 	u16 decode_mode;
     36 	u16 beamform_mode;
     37 	u32 raw_data_dim[4];
     38 	u32 decoded_data_dim[4];
     39 	f32 xdc_element_pitch[2];
     40 	f32 xdc_transform[16]; /* NOTE: column major order */
     41 	i16 channel_mapping[256];
     42 	f32 transmit_angles[256];
     43 	f32 focal_depths[256];
     44 	i16 sparse_elements[256];
     45 	i16 hadamard_rows[256];
     46 	f32 speed_of_sound;
     47 	f32 center_frequency;
     48 	f32 sampling_frequency;
     49 	f32 time_offset;
     50 	u32 transmit_mode;
     51 } zemp_bp_v1;
     52 
     53 global b32 g_should_exit;
     54 
     55 #define die(...) die_((char *)__func__, __VA_ARGS__)
     56 function no_return void
     57 die_(char *function_name, char *format, ...)
     58 {
     59 	if (function_name)
     60 		fprintf(stderr, "%s: ", function_name);
     61 
     62 	va_list ap;
     63 
     64 	va_start(ap, format);
     65 	vfprintf(stderr, format, ap);
     66 	va_end(ap);
     67 
     68 	os_exit(1);
     69 }
     70 
     71 #if OS_LINUX
     72 
     73 #include <fcntl.h>
     74 #include <sys/stat.h>
     75 #include <unistd.h>
     76 
     77 function void os_init_timer(void) { }
     78 
     79 function f64
     80 os_get_time(void)
     81 {
     82 	f64 result = (f64)os_get_timer_counter() / os_get_timer_frequency();
     83 	return result;
     84 }
     85 
     86 function s8
     87 os_read_file_simp(char *fname)
     88 {
     89 	s8 result;
     90 	i32 fd = open(fname, O_RDONLY);
     91 	if (fd < 0)
     92 		die("couldn't open file: %s\n", fname);
     93 
     94 	struct stat st;
     95 	if (stat(fname, &st) < 0)
     96 		die("couldn't stat file\n");
     97 
     98 	result.len  = st.st_size;
     99 	result.data = malloc(st.st_size);
    100 	if (!result.data)
    101 		die("couldn't alloc space for reading\n");
    102 
    103 	iz rlen = read(fd, result.data, st.st_size);
    104 	close(fd);
    105 
    106 	if (rlen != st.st_size)
    107 		die("couldn't read file: %s\n", fname);
    108 
    109 	return result;
    110 }
    111 
    112 #elif OS_WINDOWS
    113 
    114 global os_w32_context os_context;
    115 
    116 function void
    117 os_init_timer(void)
    118 {
    119 	os_context.timer_frequency = os_get_timer_frequency();
    120 }
    121 
    122 function f64
    123 os_get_time(void)
    124 {
    125 	f64 result = (f64)os_get_timer_counter() / os_context.timer_frequency;
    126 	return result;
    127 }
    128 
    129 function s8
    130 os_read_file_simp(char *fname)
    131 {
    132 	s8 result;
    133 	iptr h = CreateFileA(fname, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0);
    134 	if (h == INVALID_FILE)
    135 		die("couldn't open file: %s\n", fname);
    136 
    137 	w32_file_info fileinfo;
    138 	if (!GetFileInformationByHandle(h, &fileinfo))
    139 		die("couldn't get file info\n", stderr);
    140 
    141 	result.len  = fileinfo.nFileSizeLow;
    142 	result.data = malloc(fileinfo.nFileSizeLow);
    143 	if (!result.data)
    144 		die("couldn't alloc space for reading\n");
    145 
    146 	i32 rlen = 0;
    147 	if (!ReadFile(h, result.data, fileinfo.nFileSizeLow, &rlen, 0) && rlen != fileinfo.nFileSizeLow)
    148 		die("couldn't read file: %s\n", fname);
    149 	CloseHandle(h);
    150 
    151 	return result;
    152 }
    153 
    154 #else
    155 #error Unsupported Platform
    156 #endif
    157 
    158 function void
    159 stream_ensure_termination(Stream *s, u8 byte)
    160 {
    161 	b32 found = 0;
    162 	if (!s->errors && s->widx > 0)
    163 		found = s->data[s->widx - 1] == byte;
    164 	if (!found) {
    165 		s->errors |= s->cap - 1 < s->widx;
    166 		if (!s->errors)
    167 			s->data[s->widx++] = byte;
    168 	}
    169 }
    170 
    171 function void
    172 stream_append_u64_width(Stream *s, u64 n, u64 min_width)
    173 {
    174 	u8 tmp[64];
    175 	u8 *end = tmp + sizeof(tmp);
    176 	u8 *beg = end;
    177 	min_width = MIN(sizeof(tmp), min_width);
    178 
    179 	do { *--beg = '0' + (n % 10); } while (n /= 10);
    180 	while (end - beg > 0 &&  end - beg < min_width)
    181 		*--beg = '0';
    182 
    183 	stream_append(s, beg, end - beg);
    184 }
    185 
    186 function void *
    187 decompress_zstd_data(s8 raw)
    188 {
    189 	iz requested_size = ZSTD_getFrameContentSize(raw.data, raw.len);
    190 	void *out         = malloc(requested_size);
    191 	if (out) {
    192 		iz decompressed = ZSTD_decompress(out, requested_size, raw.data, raw.len);
    193 		if (decompressed != requested_size) {
    194 			free(out);
    195 			out = 0;
    196 		}
    197 	}
    198 	return out;
    199 }
    200 
    201 function zemp_bp_v1 *
    202 read_zemp_bp_v1(u8 *path)
    203 {
    204 	s8 raw = os_read_file_simp((char *)path);
    205 	zemp_bp_v1 *result = 0;
    206 	if (raw.len == sizeof(zemp_bp_v1) && *(u64 *)raw.data == ZEMP_BP_MAGIC) {
    207 		if (((zemp_bp_v1 *)raw.data)->version == 1)
    208 			result = (zemp_bp_v1 *)raw.data;
    209 	}
    210 	return result;
    211 }
    212 
    213 function void
    214 fill_beamformer_parameters_from_zemp_bp_v1(zemp_bp_v1 *zbp, BeamformerParameters *out)
    215 {
    216 	mem_copy(out->xdc_transform,     zbp->xdc_transform,     sizeof(out->xdc_transform));
    217 	mem_copy(out->dec_data_dim,      zbp->decoded_data_dim,  sizeof(out->dec_data_dim));
    218 	mem_copy(out->xdc_element_pitch, zbp->xdc_element_pitch, sizeof(out->xdc_element_pitch));
    219 	mem_copy(out->rf_raw_dim,        zbp->raw_data_dim,      sizeof(out->rf_raw_dim));
    220 
    221 	out->transmit_mode      = zbp->transmit_mode;
    222 	out->decode             = zbp->decode_mode;
    223 	out->das_shader_id      = zbp->beamform_mode;
    224 	out->time_offset        = zbp->time_offset;
    225 	out->sampling_frequency = zbp->sampling_frequency;
    226 	out->center_frequency   = zbp->center_frequency;
    227 	out->speed_of_sound     = zbp->speed_of_sound;
    228 }
    229 
    230 #define shift_n(v, c, n) v += n, c -= n
    231 #define shift(v, c) shift_n(v, c, 1)
    232 
    233 function void
    234 usage(char *argv0)
    235 {
    236 	die("%s [--loop] [--cuda] [--frame n] base_path study\n"
    237 	    "    --loop:    reupload data forever\n"
    238 	    "    --cuda:    use cuda for decoding\n"
    239 	    "    --frame n: use frame n of the data for display\n",
    240 	    argv0);
    241 }
    242 
    243 function b32
    244 s8_equal(s8 a, s8 b)
    245 {
    246 	b32 result = a.len == b.len;
    247 	for (iz i = 0; result && i < a.len; i++)
    248 		result &= a.data[i] == b.data[i];
    249 	return result;
    250 }
    251 
    252 function Options
    253 parse_argv(i32 argc, char *argv[])
    254 {
    255 	Options result = {0};
    256 
    257 	char *argv0 = argv[0];
    258 	shift(argv, argc);
    259 
    260 	while (argc > 0) {
    261 		s8 arg = c_str_to_s8(*argv);
    262 
    263 		if (s8_equal(arg, s8("--loop"))) {
    264 			shift(argv, argc);
    265 			result.loop = 1;
    266 		} else if (s8_equal(arg, s8("--cuda"))) {
    267 			shift(argv, argc);
    268 			result.cuda = 1;
    269 		} else if (s8_equal(arg, s8("--frame"))) {
    270 			shift(argv, argc);
    271 			if (argc) {
    272 				result.frame_number = atoi(*argv);
    273 				shift(argv, argc);
    274 			}
    275 		} else if (arg.len > 0 && arg.data[0] == '-') {
    276 			usage(argv0);
    277 		} else {
    278 			break;
    279 		}
    280 	}
    281 
    282 	result.remaining       = argv;
    283 	result.remaining_count = argc;
    284 
    285 	return result;
    286 }
    287 
    288 function i16 *
    289 decompress_data_at_work_index(Stream *path_base, u32 index)
    290 {
    291 	stream_append_byte(path_base, '_');
    292 	stream_append_u64_width(path_base, index, 2);
    293 	stream_append_s8(path_base, s8(".zst"));
    294 	stream_ensure_termination(path_base, 0);
    295 
    296 	s8 compressed_data = os_read_file_simp((char *)path_base->data);
    297 	i16 *result = decompress_zstd_data(compressed_data);
    298 	if (!result)
    299 		die("failed to decompress data: %s\n", path_base->data);
    300 	free(compressed_data.data);
    301 
    302 	return result;
    303 }
    304 
    305 function b32
    306 send_frame(i16 *restrict i16_data, BeamformerParameters *restrict bp)
    307 {
    308 	b32 result    = 0;
    309 	u32 data_size = bp->rf_raw_dim[0] * bp->rf_raw_dim[1] * sizeof(i16);
    310 
    311 	if (beamformer_push_data_with_compute(i16_data, data_size, IPT_XZ, 100))
    312 	//if (beamformer_push_data(i16_data, data_size, 100))
    313 		result = beamformer_start_compute(-1);
    314 	if (!result) printf("lib error: %s\n", beamformer_get_last_error_string());
    315 
    316 	return result;
    317 }
    318 
    319 function void
    320 execute_study(s8 study, Arena arena, Stream path, Options *options)
    321 {
    322 	fprintf(stderr, "showing: %.*s\n", (i32)study.len, study.data);
    323 
    324 	stream_append_s8(&path, study);
    325 	stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR);
    326 	stream_append_s8(&path, study);
    327 	iz path_work_index = path.widx;
    328 
    329 	stream_append_s8(&path, s8(".bp"));
    330 	stream_ensure_termination(&path, 0);
    331 
    332 	zemp_bp_v1 *zbp = read_zemp_bp_v1(path.data);
    333 	if (!zbp) die("failed to unpack parameters file\n");
    334 
    335 	BeamformerParameters bp = {0};
    336 	fill_beamformer_parameters_from_zemp_bp_v1(zbp, &bp);
    337 
    338 	mem_copy(bp.output_points, g_output_points, sizeof(bp.output_points));
    339 	bp.output_points[3] = 1;
    340 
    341 	bp.output_min_coordinate[0] = g_lateral_extent.x;
    342 	bp.output_min_coordinate[1] = 0;
    343 	bp.output_min_coordinate[2] = g_axial_extent.x;
    344 	bp.output_min_coordinate[3] = 0;
    345 
    346 	bp.output_max_coordinate[0] = g_lateral_extent.y;
    347 	bp.output_max_coordinate[1] = 0;
    348 	bp.output_max_coordinate[2] = g_axial_extent.y;
    349 	bp.output_max_coordinate[3] = 0;
    350 
    351 	bp.f_number       = g_f_number;
    352 	bp.beamform_plane = 0;
    353 	bp.interpolate    = 0;
    354 
    355 	if (zbp->sparse_elements[0] == -1) {
    356 		for (u32 i = 0; i < countof(zbp->sparse_elements); i++)
    357 			zbp->sparse_elements[i] = i;
    358 	}
    359 
    360 	{
    361 		align_as(64) v2 focal_vectors[countof(zbp->focal_depths)];
    362 		for (u32 i = 0; i < countof(zbp->focal_depths); i++)
    363 			focal_vectors[i] = (v2){{zbp->transmit_angles[i], zbp->focal_depths[i]}};
    364 		beamformer_push_focal_vectors((f32 *)focal_vectors, countof(focal_vectors), 0);
    365 	}
    366 
    367 	beamformer_push_channel_mapping(zbp->channel_mapping, countof(zbp->channel_mapping), 0);
    368 	beamformer_push_sparse_elements(zbp->sparse_elements, countof(zbp->sparse_elements), 0);
    369 	beamformer_push_parameters(&bp, 0);
    370 
    371 	free(zbp);
    372 
    373 	i32 shader_stages[16];
    374 	i32 shader_stage_count = 0;
    375 	if (options->cuda) shader_stages[shader_stage_count++] = BeamformerShaderKind_CudaDecode;
    376 	else               shader_stages[shader_stage_count++] = BeamformerShaderKind_Decode;
    377 	shader_stages[shader_stage_count++] = BeamformerShaderKind_DASCompute;
    378 
    379 	set_beamformer_pipeline(shader_stages, shader_stage_count);
    380 
    381 	stream_reset(&path, path_work_index);
    382 	i16 *data = decompress_data_at_work_index(&path, options->frame_number);
    383 
    384 	if (options->loop) {
    385 		u32 frame = 0;
    386 		f32 times[32] = {0};
    387 		f32 data_size = bp.rf_raw_dim[0] * bp.rf_raw_dim[1] * sizeof(*data);
    388 		f64 start = os_get_time();
    389 		for (;!g_should_exit;) {
    390 			if (send_frame(data, &bp)) {
    391 				f64 now   = os_get_time();
    392 				f32 delta = now - start;
    393 				start = now;
    394 
    395 				if ((frame % 16) == 0) {
    396 					f32 sum = 0;
    397 					for (u32 i = 0; i < countof(times); i++)
    398 						sum += times[i] / countof(times);
    399 					printf("Frame Time: %8.3f [ms] | 32-Frame Average: %8.3f [ms] | %8.3f GB/s\n",
    400 					       delta * 1e3, sum * 1e3, data_size / (sum * (GB(1))));
    401 				}
    402 
    403 				times[frame & 31] = delta;
    404 				frame++;
    405 			}
    406 		}
    407 	} else {
    408 		send_frame(data, &bp);
    409 	}
    410 
    411 	free(data);
    412 }
    413 
    414 function void
    415 sigint(i32 _signo)
    416 {
    417 	g_should_exit = 1;
    418 }
    419 
    420 extern i32
    421 main(i32 argc, char *argv[])
    422 {
    423 	Options options = parse_argv(argc, argv);
    424 
    425 	if (!BETWEEN(options.remaining_count, 1, 2))
    426 		usage(argv[0]);
    427 
    428 	os_init_timer();
    429 
    430 	signal(SIGINT, sigint);
    431 
    432 	Arena arena = os_alloc_arena(KB(8));
    433 	Stream path = stream_alloc(&arena, KB(4));
    434 	stream_append_s8(&path, c_str_to_s8(options.remaining[0]));
    435 	stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR);
    436 
    437 	execute_study(c_str_to_s8(options.remaining[1]), arena, path, &options);
    438 
    439 	return 0;
    440 }