ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

throughput.c (13379B)


      1 /* See LICENSE for license details. */
      2 /* TODO(rnp):
      3  * [ ]: for finer grained evaluation of throughput latency just queue a data upload
      4  *      without replacing the data.
      5  * [ ]: bug: we aren't inserting rf data between each frame
      6  */
      7 
      8 #define LIB_FN function
      9 #include "ogl_beamformer_lib.c"
     10 
     11 #include <signal.h>
     12 #include <stdarg.h>
     13 #include <stdio.h>
     14 #include <stdlib.h>
     15 #include <zstd.h>
     16 
     17 global i32 g_output_points[4] = {512, 1, 1024, 1};
     18 global v2  g_axial_extent     = {{ 10e-3f, 165e-3f}};
     19 global v2  g_lateral_extent   = {{-60e-3f,  60e-3f}};
     20 global f32 g_f_number         = 0.5f;
     21 
     22 typedef struct {
     23 	b32 loop;
     24 	b32 cuda;
     25 	u32 frame_number;
     26 
     27 	char **remaining;
     28 	i32    remaining_count;
     29 } Options;
     30 
     31 #define ZEMP_BP_MAGIC (uint64_t)0x5042504D455AFECAull
     32 typedef struct {
     33 	u64 magic;
     34 	u32 version;
     35 	u16 decode_mode;
     36 	u16 beamform_mode;
     37 	u32 raw_data_dim[4];
     38 	u32 decoded_data_dim[4];
     39 	f32 xdc_element_pitch[2];
     40 	f32 xdc_transform[16]; /* NOTE: column major order */
     41 	i16 channel_mapping[256];
     42 	f32 transmit_angles[256];
     43 	f32 focal_depths[256];
     44 	i16 sparse_elements[256];
     45 	i16 hadamard_rows[256];
     46 	f32 speed_of_sound;
     47 	f32 center_frequency;
     48 	f32 sampling_frequency;
     49 	f32 time_offset;
     50 	u32 transmit_mode;
     51 } zemp_bp_v1;
     52 
     53 global b32 g_should_exit;
     54 
     55 #define die(...) die_((char *)__func__, __VA_ARGS__)
     56 function no_return void
     57 die_(char *function_name, char *format, ...)
     58 {
     59 	if (function_name)
     60 		fprintf(stderr, "%s: ", function_name);
     61 
     62 	va_list ap;
     63 
     64 	va_start(ap, format);
     65 	vfprintf(stderr, format, ap);
     66 	va_end(ap);
     67 
     68 	os_exit(1);
     69 }
     70 
     71 #if OS_LINUX
     72 
     73 #include <fcntl.h>
     74 #include <sys/stat.h>
     75 #include <unistd.h>
     76 
     77 function void os_init_timer(void) { }
     78 
     79 function f64
     80 os_get_time(void)
     81 {
     82 	f64 result = (f64)os_get_timer_counter() / (f64)os_get_timer_frequency();
     83 	return result;
     84 }
     85 
     86 function s8
     87 os_read_file_simp(char *fname)
     88 {
     89 	s8 result;
     90 	i32 fd = open(fname, O_RDONLY);
     91 	if (fd < 0)
     92 		die("couldn't open file: %s\n", fname);
     93 
     94 	struct stat st;
     95 	if (stat(fname, &st) < 0)
     96 		die("couldn't stat file\n");
     97 
     98 	result.len  = st.st_size;
     99 	result.data = malloc((uz)st.st_size);
    100 	if (!result.data)
    101 		die("couldn't alloc space for reading\n");
    102 
    103 	iz rlen = read(fd, result.data, (u32)st.st_size);
    104 	close(fd);
    105 
    106 	if (rlen != st.st_size)
    107 		die("couldn't read file: %s\n", fname);
    108 
    109 	return result;
    110 }
    111 
    112 #elif OS_WINDOWS
    113 
    114 global w32_context os_context;
    115 
    116 function void
    117 os_init_timer(void)
    118 {
    119 	os_context.timer_frequency = os_get_timer_frequency();
    120 }
    121 
    122 function f64
    123 os_get_time(void)
    124 {
    125 	f64 result = (f64)os_get_timer_counter() / (f64)os_context.timer_frequency;
    126 	return result;
    127 }
    128 
    129 function s8
    130 os_read_file_simp(char *fname)
    131 {
    132 	s8 result;
    133 	iptr h = CreateFileA(fname, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0);
    134 	if (h == INVALID_FILE)
    135 		die("couldn't open file: %s\n", fname);
    136 
    137 	w32_file_info fileinfo;
    138 	if (!GetFileInformationByHandle(h, &fileinfo))
    139 		die("couldn't get file info\n", stderr);
    140 
    141 	result.len  = fileinfo.nFileSizeLow;
    142 	result.data = malloc(fileinfo.nFileSizeLow);
    143 	if (!result.data)
    144 		die("couldn't alloc space for reading\n");
    145 
    146 	i32 rlen = 0;
    147 	if (!ReadFile(h, result.data, (i32)fileinfo.nFileSizeLow, &rlen, 0) && rlen != (i32)fileinfo.nFileSizeLow)
    148 		die("couldn't read file: %s\n", fname);
    149 	CloseHandle(h);
    150 
    151 	return result;
    152 }
    153 
    154 #else
    155 #error Unsupported Platform
    156 #endif
    157 
    158 function void
    159 stream_ensure_termination(Stream *s, u8 byte)
    160 {
    161 	b32 found = 0;
    162 	if (!s->errors && s->widx > 0)
    163 		found = s->data[s->widx - 1] == byte;
    164 	if (!found) {
    165 		s->errors |= s->cap - 1 < s->widx;
    166 		if (!s->errors)
    167 			s->data[s->widx++] = byte;
    168 	}
    169 }
    170 
    171 function void *
    172 decompress_zstd_data(s8 raw)
    173 {
    174 	uz requested_size = ZSTD_getFrameContentSize(raw.data, (uz)raw.len);
    175 	void *out         = malloc(requested_size);
    176 	if (out) {
    177 		uz decompressed = ZSTD_decompress(out, requested_size, raw.data, (uz)raw.len);
    178 		if (decompressed != requested_size) {
    179 			free(out);
    180 			out = 0;
    181 		}
    182 	}
    183 	return out;
    184 }
    185 
    186 function zemp_bp_v1 *
    187 read_zemp_bp_v1(u8 *path)
    188 {
    189 	s8 raw = os_read_file_simp((char *)path);
    190 	zemp_bp_v1 *result = 0;
    191 	if (raw.len == sizeof(zemp_bp_v1) && *(u64 *)raw.data == ZEMP_BP_MAGIC) {
    192 		if (((zemp_bp_v1 *)raw.data)->version == 1)
    193 			result = (zemp_bp_v1 *)raw.data;
    194 	}
    195 	return result;
    196 }
    197 
    198 function void
    199 beamformer_parameters_from_zemp_bp_v1(zemp_bp_v1 *zbp, BeamformerParameters *out)
    200 {
    201 	mem_copy(out->xdc_transform,       zbp->xdc_transform,     sizeof(out->xdc_transform));
    202 	mem_copy(out->xdc_element_pitch,   zbp->xdc_element_pitch, sizeof(out->xdc_element_pitch));
    203 	mem_copy(out->raw_data_dimensions, zbp->raw_data_dim,      sizeof(out->raw_data_dimensions));
    204 
    205 	out->sample_count           = zbp->decoded_data_dim[0];
    206 	out->channel_count          = zbp->decoded_data_dim[1];
    207 	out->acquisition_count      = zbp->decoded_data_dim[2];
    208 	out->decode_mode            = (u8)zbp->decode_mode;
    209 	out->das_shader_id          = zbp->beamform_mode;
    210 	out->time_offset            = zbp->time_offset;
    211 	out->sampling_frequency     = zbp->sampling_frequency;
    212 	out->demodulation_frequency = zbp->center_frequency;
    213 	out->speed_of_sound         = zbp->speed_of_sound;
    214 }
    215 
    216 #define shift_n(v, c, n) v += n, c -= n
    217 #define shift(v, c) shift_n(v, c, 1)
    218 
    219 function void
    220 usage(char *argv0)
    221 {
    222 	die("%s [--loop] [--cuda] [--frame n] base_path study\n"
    223 	    "    --loop:    reupload data forever\n"
    224 	    "    --cuda:    use cuda for decoding\n"
    225 	    "    --frame n: use frame n of the data for display\n",
    226 	    argv0);
    227 }
    228 
    229 function b32
    230 s8_equal(s8 a, s8 b)
    231 {
    232 	b32 result = a.len == b.len;
    233 	for (iz i = 0; result && i < a.len; i++)
    234 		result &= a.data[i] == b.data[i];
    235 	return result;
    236 }
    237 
    238 function Options
    239 parse_argv(i32 argc, char *argv[])
    240 {
    241 	Options result = {0};
    242 
    243 	char *argv0 = argv[0];
    244 	shift(argv, argc);
    245 
    246 	while (argc > 0) {
    247 		s8 arg = c_str_to_s8(*argv);
    248 
    249 		if (s8_equal(arg, s8("--loop"))) {
    250 			shift(argv, argc);
    251 			result.loop = 1;
    252 		} else if (s8_equal(arg, s8("--cuda"))) {
    253 			shift(argv, argc);
    254 			result.cuda = 1;
    255 		} else if (s8_equal(arg, s8("--frame"))) {
    256 			shift(argv, argc);
    257 			if (argc) {
    258 				result.frame_number = (u32)atoi(*argv);
    259 				shift(argv, argc);
    260 			}
    261 		} else if (arg.len > 0 && arg.data[0] == '-') {
    262 			usage(argv0);
    263 		} else {
    264 			break;
    265 		}
    266 	}
    267 
    268 	result.remaining       = argv;
    269 	result.remaining_count = argc;
    270 
    271 	return result;
    272 }
    273 
    274 function i16 *
    275 decompress_data_at_work_index(Stream *path_base, u32 index)
    276 {
    277 	stream_append_byte(path_base, '_');
    278 	stream_append_u64_width(path_base, index, 2);
    279 	stream_append_s8(path_base, s8(".zst"));
    280 	stream_ensure_termination(path_base, 0);
    281 
    282 	s8 compressed_data = os_read_file_simp((char *)path_base->data);
    283 	i16 *result = decompress_zstd_data(compressed_data);
    284 	if (!result)
    285 		die("failed to decompress data: %s\n", path_base->data);
    286 	free(compressed_data.data);
    287 
    288 	return result;
    289 }
    290 
    291 function b32
    292 send_frame(i16 *restrict i16_data, BeamformerParameters *restrict bp)
    293 {
    294 	u32 data_size = bp->raw_data_dimensions[0] * bp->raw_data_dimensions[1] * sizeof(i16);
    295 	b32 result    = beamformer_push_data_with_compute(i16_data, data_size, BeamformerViewPlaneTag_XZ, 0);
    296 	if (!result && !g_should_exit) printf("lib error: %s\n", beamformer_get_last_error_string());
    297 
    298 	return result;
    299 }
    300 
    301 function void
    302 execute_study(s8 study, Arena arena, Stream path, Options *options)
    303 {
    304 	fprintf(stderr, "showing: %.*s\n", (i32)study.len, study.data);
    305 
    306 	stream_append_s8(&path, study);
    307 	stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR);
    308 	stream_append_s8(&path, study);
    309 	i32 path_work_index = path.widx;
    310 
    311 	stream_append_s8(&path, s8(".bp"));
    312 	stream_ensure_termination(&path, 0);
    313 
    314 	zemp_bp_v1 *zbp = read_zemp_bp_v1(path.data);
    315 	if (!zbp) die("failed to unpack parameters file\n");
    316 
    317 	BeamformerParameters bp = {0};
    318 	beamformer_parameters_from_zemp_bp_v1(zbp, &bp);
    319 
    320 	mem_copy(bp.output_points, g_output_points, sizeof(bp.output_points));
    321 	bp.output_points[3] = 1;
    322 
    323 	bp.output_min_coordinate[0] = g_lateral_extent.x;
    324 	bp.output_min_coordinate[1] = 0;
    325 	bp.output_min_coordinate[2] = g_axial_extent.x;
    326 
    327 	bp.output_max_coordinate[0] = g_lateral_extent.y;
    328 	bp.output_max_coordinate[1] = 0;
    329 	bp.output_max_coordinate[2] = g_axial_extent.y;
    330 
    331 	bp.f_number           = g_f_number;
    332 	bp.beamform_plane     = 0;
    333 	bp.interpolation_mode = BeamformerInterpolationMode_Cubic;
    334 
    335 	bp.decimation_rate = 1;
    336 	bp.demodulation_frequency = bp.sampling_frequency / 4;
    337 
    338 	/* NOTE(rnp): v1 files didn't specify sampling mode. it was almost always 4X */
    339 	bp.sampling_mode = BeamformerSamplingMode_4X;
    340 
    341 	#if 0
    342 	BeamformerFilterParameters kaiser = {0};
    343 	kaiser.Kaiser.beta             = 5.65f;
    344 	kaiser.Kaiser.cutoff_frequency = 2.0e6f;
    345 	kaiser.Kaiser.length           = 36;
    346 
    347 	beamformer_create_filter(BeamformerFilterKind_Kaiser, (f32 *)&kaiser.Kaiser,
    348 	                         sizeof(kaiser.Kaiser) / sizeof(f32), bp.sampling_frequency / 2, 0, 0, 0);
    349 	beamformer_set_pipeline_stage_parameters(0, 0);
    350 	#endif
    351 
    352 	#if 1
    353 	BeamformerFilterParameters matched = {0};
    354 	typeof(matched.MatchedChirp) *mp = &matched.MatchedChirp;
    355 	mp->duration      = 18e-6f;
    356 	mp->min_frequency = 2.9e6f - bp.demodulation_frequency;
    357 	mp->max_frequency = 6.0e6f - bp.demodulation_frequency;
    358 
    359 	bp.time_offset += mp->duration / 2;
    360 
    361 	beamformer_create_filter(BeamformerFilterKind_MatchedChirp, (f32 *)&matched.MatchedChirp,
    362 	                         sizeof(matched.MatchedChirp) / sizeof(f32), bp.sampling_frequency / 2,
    363 	                         1, 0, 0);
    364 	beamformer_set_pipeline_stage_parameters(0, 0);
    365 	#endif
    366 
    367 	if (zbp->sparse_elements[0] == -1) {
    368 		for (i16 i = 0; i < countof(zbp->sparse_elements); i++)
    369 			zbp->sparse_elements[i] = i;
    370 	}
    371 
    372 	b32 tx_rows = (zbp->transmit_mode & (1 << 1)) == 0;
    373 	b32 rx_rows = (zbp->transmit_mode & (1 << 0)) == 0;
    374 	u8 packed_tx_rx = 0;
    375 	if (tx_rows) packed_tx_rx |= BeamformerRCAOrientation_Rows    << 4;
    376 	else         packed_tx_rx |= BeamformerRCAOrientation_Columns << 4;
    377 	if (rx_rows) packed_tx_rx |= BeamformerRCAOrientation_Rows    << 0;
    378 	else         packed_tx_rx |= BeamformerRCAOrientation_Columns << 0;
    379 
    380 	if (bp.das_shader_id == BeamformerAcquisitionKind_HERCULES ||
    381 	    bp.das_shader_id == BeamformerAcquisitionKind_UHERCULES)
    382 	{
    383 		bp.single_focus       = 1;
    384 		bp.single_orientation = 1;
    385 
    386 		bp.transmit_receive_orientation = packed_tx_rx;
    387 		bp.focal_vector[0] = zbp->transmit_angles[0];
    388 		bp.focal_vector[1] = zbp->focal_depths[0];
    389 	} else {
    390 		alignas(64) v2 focal_vectors[BeamformerMaxChannelCount];
    391 		for (u32 i = 0; i < countof(focal_vectors); i++)
    392 			focal_vectors[i] = (v2){{zbp->transmit_angles[i], zbp->focal_depths[i]}};
    393 		beamformer_push_focal_vectors((f32 *)focal_vectors, countof(focal_vectors));
    394 
    395 		alignas(64) u8 transmit_receive_orientations[BeamformerMaxChannelCount];
    396 		for (u32 i = 0; i < countof(transmit_receive_orientations); i++)
    397 			transmit_receive_orientations[i] = packed_tx_rx;
    398 		beamformer_push_transmit_receive_orientations(transmit_receive_orientations,
    399 		                                              countof(transmit_receive_orientations));
    400 	}
    401 
    402 	beamformer_push_channel_mapping(zbp->channel_mapping, countof(zbp->channel_mapping));
    403 	beamformer_push_sparse_elements(zbp->sparse_elements, countof(zbp->sparse_elements));
    404 	beamformer_push_parameters(&bp);
    405 
    406 	i32 shader_stages[16];
    407 	u32 shader_stage_count = 0;
    408 	shader_stages[shader_stage_count++] = BeamformerShaderKind_Demodulate;
    409 	if (options->cuda) shader_stages[shader_stage_count++] = BeamformerShaderKind_CudaDecode;
    410 	else               shader_stages[shader_stage_count++] = BeamformerShaderKind_Decode;
    411 	shader_stages[shader_stage_count++] = BeamformerShaderKind_DAS;
    412 
    413 	beamformer_push_pipeline(shader_stages, shader_stage_count, BeamformerDataKind_Int16);
    414 
    415 	beamformer_set_global_timeout(1000);
    416 
    417 	stream_reset(&path, path_work_index);
    418 	i16 *data = decompress_data_at_work_index(&path, options->frame_number);
    419 
    420 	if (options->loop) {
    421 		BeamformerLiveImagingParameters lip = {.active = 1};
    422 		beamformer_set_live_parameters(&lip);
    423 
    424 		u32 frame = 0;
    425 		f32 times[32] = {0};
    426 		f32 data_size = (f32)(bp.raw_data_dimensions[0] * bp.raw_data_dimensions[1] * sizeof(*data));
    427 		f64 start = os_get_time();
    428 		for (;!g_should_exit;) {
    429 			if (send_frame(data, &bp)) {
    430 				f64 now   = os_get_time();
    431 				f32 delta = (f32)(now - start);
    432 				start = now;
    433 
    434 				if ((frame % 16) == 0) {
    435 					f32 sum = 0;
    436 					for (u32 i = 0; i < countof(times); i++)
    437 						sum += times[i] / countof(times);
    438 					printf("Frame Time: %8.3f [ms] | 32-Frame Average: %8.3f [ms] | %8.3f GB/s\n",
    439 					       delta * 1e3, sum * 1e3, data_size / (sum * (GB(1))));
    440 				}
    441 
    442 				times[frame % countof(times)] = delta;
    443 				frame++;
    444 			}
    445 			i32 flag = beamformer_live_parameters_get_dirty_flag();
    446 			if (flag != -1 && (1 << flag) == BeamformerLiveImagingDirtyFlags_StopImaging)
    447 				break;
    448 		}
    449 
    450 		lip.active = 0;
    451 		beamformer_set_live_parameters(&lip);
    452 	} else {
    453 		for (u32 i = 0; i < zbp->raw_data_dim[2]; i++)
    454 			send_frame(data + i * bp.raw_data_dimensions[0] * bp.raw_data_dimensions[1], &bp);
    455 	}
    456 
    457 	free(zbp);
    458 	free(data);
    459 }
    460 
    461 function void
    462 sigint(i32 _signo)
    463 {
    464 	g_should_exit = 1;
    465 }
    466 
    467 extern i32
    468 main(i32 argc, char *argv[])
    469 {
    470 	Options options = parse_argv(argc, argv);
    471 
    472 	if (!BETWEEN(options.remaining_count, 1, 2))
    473 		usage(argv[0]);
    474 
    475 	os_init_timer();
    476 
    477 	signal(SIGINT, sigint);
    478 
    479 	Arena arena = os_alloc_arena(KB(8));
    480 	Stream path = stream_alloc(&arena, KB(4));
    481 	stream_append_s8(&path, c_str_to_s8(options.remaining[0]));
    482 	stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR);
    483 
    484 	execute_study(c_str_to_s8(options.remaining[1]), arena, path, &options);
    485 
    486 	return 0;
    487 }