ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

throughput.c (13306B)


      1 /* See LICENSE for license details. */
      2 /* TODO(rnp):
      3  * [ ]: for finer grained evaluation of throughput latency just queue a data upload
      4  *      without replacing the data.
      5  * [ ]: bug: we aren't inserting rf data between each frame
      6  */
      7 
      8 #define LIB_FN function
      9 #include "ogl_beamformer_lib.c"
     10 
     11 #include <signal.h>
     12 #include <stdarg.h>
     13 #include <stdio.h>
     14 #include <stdlib.h>
     15 #include <zstd.h>
     16 
     17 global iv3 g_output_points    = {{512, 1, 1024}};
     18 global v2  g_axial_extent     = {{ 10e-3f, 165e-3f}};
     19 global v2  g_lateral_extent   = {{-60e-3f,  60e-3f}};
     20 global f32 g_f_number         = 0.5f;
     21 
     22 typedef struct {
     23 	b32 loop;
     24 	b32 cuda;
     25 	u32 frame_number;
     26 
     27 	char **remaining;
     28 	i32    remaining_count;
     29 } Options;
     30 
     31 #define ZEMP_BP_MAGIC (uint64_t)0x5042504D455AFECAull
     32 typedef struct {
     33 	u64 magic;
     34 	u32 version;
     35 	u16 decode_mode;
     36 	u16 beamform_mode;
     37 	u32 raw_data_dim[4];
     38 	u32 decoded_data_dim[4];
     39 	f32 xdc_element_pitch[2];
     40 	f32 xdc_transform[16]; /* NOTE: column major order */
     41 	i16 channel_mapping[256];
     42 	f32 transmit_angles[256];
     43 	f32 focal_depths[256];
     44 	i16 sparse_elements[256];
     45 	i16 hadamard_rows[256];
     46 	f32 speed_of_sound;
     47 	f32 center_frequency;
     48 	f32 sampling_frequency;
     49 	f32 time_offset;
     50 	u32 transmit_mode;
     51 } zemp_bp_v1;
     52 
     53 global b32 g_should_exit;
     54 
     55 #define die(...) die_((char *)__func__, __VA_ARGS__)
     56 function no_return void
     57 die_(char *function_name, char *format, ...)
     58 {
     59 	if (function_name)
     60 		fprintf(stderr, "%s: ", function_name);
     61 
     62 	va_list ap;
     63 
     64 	va_start(ap, format);
     65 	vfprintf(stderr, format, ap);
     66 	va_end(ap);
     67 
     68 	os_exit(1);
     69 }
     70 
     71 #if OS_LINUX
     72 
     73 #include <fcntl.h>
     74 #include <sys/stat.h>
     75 #include <unistd.h>
     76 
     77 function f64
     78 os_get_time(void)
     79 {
     80 	f64 result = (f64)os_get_timer_counter() / (f64)os_get_timer_frequency();
     81 	return result;
     82 }
     83 
     84 function s8
     85 os_read_file_simp(char *fname)
     86 {
     87 	s8 result;
     88 	i32 fd = open(fname, O_RDONLY);
     89 	if (fd < 0)
     90 		die("couldn't open file: %s\n", fname);
     91 
     92 	struct stat st;
     93 	if (stat(fname, &st) < 0)
     94 		die("couldn't stat file\n");
     95 
     96 	result.len  = st.st_size;
     97 	result.data = malloc((uz)st.st_size);
     98 	if (!result.data)
     99 		die("couldn't alloc space for reading\n");
    100 
    101 	iz rlen = read(fd, result.data, (u32)st.st_size);
    102 	close(fd);
    103 
    104 	if (rlen != st.st_size)
    105 		die("couldn't read file: %s\n", fname);
    106 
    107 	return result;
    108 }
    109 
    110 #elif OS_WINDOWS
    111 
    112 function f64
    113 os_get_time(void)
    114 {
    115 	f64 result = (f64)os_get_timer_counter() / (f64)os_w32_context.timer_frequency;
    116 	return result;
    117 }
    118 
    119 function s8
    120 os_read_file_simp(char *fname)
    121 {
    122 	s8 result;
    123 	iptr h = CreateFileA(fname, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0);
    124 	if (h == INVALID_FILE)
    125 		die("couldn't open file: %s\n", fname);
    126 
    127 	w32_file_info fileinfo;
    128 	if (!GetFileInformationByHandle(h, &fileinfo))
    129 		die("couldn't get file info\n", stderr);
    130 
    131 	result.len  = fileinfo.nFileSizeLow;
    132 	result.data = malloc(fileinfo.nFileSizeLow);
    133 	if (!result.data)
    134 		die("couldn't alloc space for reading\n");
    135 
    136 	i32 rlen = 0;
    137 	if (!ReadFile(h, result.data, (i32)fileinfo.nFileSizeLow, &rlen, 0) && rlen != (i32)fileinfo.nFileSizeLow)
    138 		die("couldn't read file: %s\n", fname);
    139 	CloseHandle(h);
    140 
    141 	return result;
    142 }
    143 
    144 #else
    145 #error Unsupported Platform
    146 #endif
    147 
    148 function void
    149 stream_ensure_termination(Stream *s, u8 byte)
    150 {
    151 	b32 found = 0;
    152 	if (!s->errors && s->widx > 0)
    153 		found = s->data[s->widx - 1] == byte;
    154 	if (!found) {
    155 		s->errors |= s->cap - 1 < s->widx;
    156 		if (!s->errors)
    157 			s->data[s->widx++] = byte;
    158 	}
    159 }
    160 
    161 function void *
    162 decompress_zstd_data(s8 raw)
    163 {
    164 	uz requested_size = ZSTD_getFrameContentSize(raw.data, (uz)raw.len);
    165 	void *out         = malloc(requested_size);
    166 	if (out) {
    167 		uz decompressed = ZSTD_decompress(out, requested_size, raw.data, (uz)raw.len);
    168 		if (decompressed != requested_size) {
    169 			free(out);
    170 			out = 0;
    171 		}
    172 	}
    173 	return out;
    174 }
    175 
    176 function zemp_bp_v1 *
    177 read_zemp_bp_v1(u8 *path)
    178 {
    179 	s8 raw = os_read_file_simp((char *)path);
    180 	zemp_bp_v1 *result = 0;
    181 	if (raw.len == sizeof(zemp_bp_v1) && *(u64 *)raw.data == ZEMP_BP_MAGIC) {
    182 		if (((zemp_bp_v1 *)raw.data)->version == 1)
    183 			result = (zemp_bp_v1 *)raw.data;
    184 	}
    185 	return result;
    186 }
    187 
    188 function void
    189 beamformer_parameters_from_zemp_bp_v1(zemp_bp_v1 *zbp, BeamformerParameters *out)
    190 {
    191 	mem_copy(out->xdc_transform.E,       zbp->xdc_transform,     sizeof(out->xdc_transform));
    192 	mem_copy(out->xdc_element_pitch.E,   zbp->xdc_element_pitch, sizeof(out->xdc_element_pitch));
    193 	mem_copy(out->raw_data_dimensions.E, zbp->raw_data_dim,      sizeof(out->raw_data_dimensions));
    194 
    195 	out->sample_count           = zbp->decoded_data_dim[0];
    196 	out->channel_count          = zbp->decoded_data_dim[1];
    197 	out->acquisition_count      = zbp->decoded_data_dim[2];
    198 	out->decode_mode            = (u8)zbp->decode_mode;
    199 	out->das_shader_id          = zbp->beamform_mode;
    200 	out->time_offset            = zbp->time_offset;
    201 	out->sampling_frequency     = zbp->sampling_frequency;
    202 	out->demodulation_frequency = zbp->center_frequency;
    203 	out->speed_of_sound         = zbp->speed_of_sound;
    204 }
    205 
    206 #define shift_n(v, c, n) v += n, c -= n
    207 #define shift(v, c) shift_n(v, c, 1)
    208 
    209 function void
    210 usage(char *argv0)
    211 {
    212 	die("%s [--loop] [--cuda] [--frame n] base_path study\n"
    213 	    "    --loop:    reupload data forever\n"
    214 	    "    --cuda:    use cuda for decoding\n"
    215 	    "    --frame n: use frame n of the data for display\n",
    216 	    argv0);
    217 }
    218 
    219 function b32
    220 s8_equal(s8 a, s8 b)
    221 {
    222 	b32 result = a.len == b.len;
    223 	for (iz i = 0; result && i < a.len; i++)
    224 		result &= a.data[i] == b.data[i];
    225 	return result;
    226 }
    227 
    228 function Options
    229 parse_argv(i32 argc, char *argv[])
    230 {
    231 	Options result = {0};
    232 
    233 	char *argv0 = argv[0];
    234 	shift(argv, argc);
    235 
    236 	while (argc > 0) {
    237 		s8 arg = c_str_to_s8(*argv);
    238 
    239 		if (s8_equal(arg, s8("--loop"))) {
    240 			shift(argv, argc);
    241 			result.loop = 1;
    242 		} else if (s8_equal(arg, s8("--cuda"))) {
    243 			shift(argv, argc);
    244 			result.cuda = 1;
    245 		} else if (s8_equal(arg, s8("--frame"))) {
    246 			shift(argv, argc);
    247 			if (argc) {
    248 				result.frame_number = (u32)atoi(*argv);
    249 				shift(argv, argc);
    250 			}
    251 		} else if (arg.len > 0 && arg.data[0] == '-') {
    252 			usage(argv0);
    253 		} else {
    254 			break;
    255 		}
    256 	}
    257 
    258 	result.remaining       = argv;
    259 	result.remaining_count = argc;
    260 
    261 	return result;
    262 }
    263 
    264 function i16 *
    265 decompress_data_at_work_index(Stream *path_base, u32 index)
    266 {
    267 	stream_append_byte(path_base, '_');
    268 	stream_append_u64_width(path_base, index, 2);
    269 	stream_append_s8(path_base, s8(".zst"));
    270 	stream_ensure_termination(path_base, 0);
    271 
    272 	s8 compressed_data = os_read_file_simp((char *)path_base->data);
    273 	i16 *result = decompress_zstd_data(compressed_data);
    274 	if (!result)
    275 		die("failed to decompress data: %s\n", path_base->data);
    276 	free(compressed_data.data);
    277 
    278 	return result;
    279 }
    280 
    281 function b32
    282 send_frame(i16 *restrict i16_data, BeamformerParameters *restrict bp)
    283 {
    284 	u32 data_size = bp->raw_data_dimensions.E[0] * bp->raw_data_dimensions.E[1] * sizeof(i16);
    285 	b32 result    = beamformer_push_data_with_compute(i16_data, data_size, BeamformerViewPlaneTag_XZ, 0);
    286 	if (!result && !g_should_exit) printf("lib error: %s\n", beamformer_get_last_error_string());
    287 
    288 	return result;
    289 }
    290 
    291 function void
    292 execute_study(s8 study, Arena arena, Stream path, Options *options)
    293 {
    294 	fprintf(stderr, "showing: %.*s\n", (i32)study.len, study.data);
    295 
    296 	stream_append_s8(&path, study);
    297 	stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR);
    298 	stream_append_s8(&path, study);
    299 	i32 path_work_index = path.widx;
    300 
    301 	stream_append_s8(&path, s8(".bp"));
    302 	stream_ensure_termination(&path, 0);
    303 
    304 	zemp_bp_v1 *zbp = read_zemp_bp_v1(path.data);
    305 	if (!zbp) die("failed to unpack parameters file\n");
    306 
    307 	BeamformerParameters bp = {0};
    308 	beamformer_parameters_from_zemp_bp_v1(zbp, &bp);
    309 
    310 	bp.output_points.xyz = g_output_points;
    311 	bp.output_points.w   = 1;
    312 
    313 	bp.output_min_coordinate.E[0] = g_lateral_extent.x;
    314 	bp.output_min_coordinate.E[1] = 0;
    315 	bp.output_min_coordinate.E[2] = g_axial_extent.x;
    316 
    317 	bp.output_max_coordinate.E[0] = g_lateral_extent.y;
    318 	bp.output_max_coordinate.E[1] = 0;
    319 	bp.output_max_coordinate.E[2] = g_axial_extent.y;
    320 
    321 	bp.f_number           = g_f_number;
    322 	bp.beamform_plane     = 0;
    323 	bp.interpolation_mode = BeamformerInterpolationMode_Cubic;
    324 
    325 	bp.decimation_rate = 1;
    326 	bp.demodulation_frequency = bp.sampling_frequency / 4;
    327 
    328 	/* NOTE(rnp): v1 files didn't specify sampling mode. it was almost always 4X */
    329 	bp.sampling_mode = BeamformerSamplingMode_4X;
    330 
    331 	#if 0
    332 	BeamformerFilterParameters kaiser = {0};
    333 	kaiser.Kaiser.beta             = 5.65f;
    334 	kaiser.Kaiser.cutoff_frequency = 2.0e6f;
    335 	kaiser.Kaiser.length           = 36;
    336 
    337 	beamformer_create_filter(BeamformerFilterKind_Kaiser, (f32 *)&kaiser.kaiser,
    338 	                         sizeof(kaiser.kaiser), bp.sampling_frequency / 2, 0, 0, 0);
    339 	beamformer_set_pipeline_stage_parameters(0, 0);
    340 	#endif
    341 
    342 	#if 1
    343 	BeamformerFilterParameters matched = {0};
    344 	typeof(matched.matched_chirp) *mp = &matched.matched_chirp;
    345 	mp->duration      = 18e-6f;
    346 	mp->min_frequency = 2.9e6f - bp.demodulation_frequency;
    347 	mp->max_frequency = 6.0e6f - bp.demodulation_frequency;
    348 
    349 	bp.time_offset += mp->duration / 2;
    350 
    351 	beamformer_create_filter(BeamformerFilterKind_MatchedChirp, (f32 *)mp, sizeof(*mp),
    352 	                         bp.sampling_frequency / 2, 1, 0, 0);
    353 	beamformer_set_pipeline_stage_parameters(0, 0);
    354 	#endif
    355 
    356 	if (zbp->sparse_elements[0] == -1) {
    357 		for (i16 i = 0; i < countof(zbp->sparse_elements); i++)
    358 			zbp->sparse_elements[i] = i;
    359 	}
    360 
    361 	b32 tx_rows = (zbp->transmit_mode & (1 << 1)) == 0;
    362 	b32 rx_rows = (zbp->transmit_mode & (1 << 0)) == 0;
    363 	u8 packed_tx_rx = 0;
    364 	if (tx_rows) packed_tx_rx |= BeamformerRCAOrientation_Rows    << 4;
    365 	else         packed_tx_rx |= BeamformerRCAOrientation_Columns << 4;
    366 	if (rx_rows) packed_tx_rx |= BeamformerRCAOrientation_Rows    << 0;
    367 	else         packed_tx_rx |= BeamformerRCAOrientation_Columns << 0;
    368 
    369 	if (bp.das_shader_id == BeamformerAcquisitionKind_HERCULES ||
    370 	    bp.das_shader_id == BeamformerAcquisitionKind_UHERCULES)
    371 	{
    372 		bp.single_focus       = 1;
    373 		bp.single_orientation = 1;
    374 
    375 		bp.transmit_receive_orientation = packed_tx_rx;
    376 		bp.focal_vector.E[0] = zbp->transmit_angles[0];
    377 		bp.focal_vector.E[1] = zbp->focal_depths[0];
    378 	} else {
    379 		alignas(64) v2 focal_vectors[BeamformerMaxChannelCount];
    380 		for (u32 i = 0; i < countof(focal_vectors); i++)
    381 			focal_vectors[i] = (v2){{zbp->transmit_angles[i], zbp->focal_depths[i]}};
    382 		beamformer_push_focal_vectors((f32 *)focal_vectors, countof(focal_vectors));
    383 
    384 		alignas(64) u8 transmit_receive_orientations[BeamformerMaxChannelCount];
    385 		for (u32 i = 0; i < countof(transmit_receive_orientations); i++)
    386 			transmit_receive_orientations[i] = packed_tx_rx;
    387 		beamformer_push_transmit_receive_orientations(transmit_receive_orientations,
    388 		                                              countof(transmit_receive_orientations));
    389 	}
    390 
    391 	beamformer_push_channel_mapping(zbp->channel_mapping, countof(zbp->channel_mapping));
    392 	beamformer_push_sparse_elements(zbp->sparse_elements, countof(zbp->sparse_elements));
    393 	beamformer_push_parameters(&bp);
    394 
    395 	i32 shader_stages[16];
    396 	u32 shader_stage_count = 0;
    397 	shader_stages[shader_stage_count++] = BeamformerShaderKind_Demodulate;
    398 	if (options->cuda) shader_stages[shader_stage_count++] = BeamformerShaderKind_CudaDecode;
    399 	else               shader_stages[shader_stage_count++] = BeamformerShaderKind_Decode;
    400 	shader_stages[shader_stage_count++] = BeamformerShaderKind_DAS;
    401 
    402 	beamformer_push_pipeline(shader_stages, shader_stage_count, BeamformerDataKind_Int16);
    403 
    404 	beamformer_set_global_timeout(1000);
    405 
    406 	stream_reset(&path, path_work_index);
    407 	i16 *data = decompress_data_at_work_index(&path, options->frame_number);
    408 
    409 	if (options->loop) {
    410 		BeamformerLiveImagingParameters lip = {.active = 1, .save_enabled = 1};
    411 		s8 short_name = s8("Throughput");
    412 		mem_copy(lip.save_name_tag, short_name.data, (uz)short_name.len);
    413 		lip.save_name_tag_length = (i32)short_name.len;
    414 		beamformer_set_live_parameters(&lip);
    415 
    416 		u32 frame = 0;
    417 		f32 times[32] = {0};
    418 		f32 data_size = (f32)(bp.raw_data_dimensions.E[0] * bp.raw_data_dimensions.E[1] * sizeof(*data));
    419 		f64 start = os_get_time();
    420 		for (;!g_should_exit;) {
    421 			if (send_frame(data, &bp)) {
    422 				f64 now   = os_get_time();
    423 				f32 delta = (f32)(now - start);
    424 				start = now;
    425 
    426 				if ((frame % 16) == 0) {
    427 					f32 sum = 0;
    428 					for (u32 i = 0; i < countof(times); i++)
    429 						sum += times[i] / countof(times);
    430 					printf("Frame Time: %8.3f [ms] | 32-Frame Average: %8.3f [ms] | %8.3f GB/s\n",
    431 					       delta * 1e3, sum * 1e3, data_size / (sum * (GB(1))));
    432 				}
    433 
    434 				times[frame % countof(times)] = delta;
    435 				frame++;
    436 			}
    437 			i32 flag = beamformer_live_parameters_get_dirty_flag();
    438 			if (flag != -1 && (1 << flag) == BeamformerLiveImagingDirtyFlags_StopImaging)
    439 				break;
    440 		}
    441 
    442 		lip.active = 0;
    443 		beamformer_set_live_parameters(&lip);
    444 	} else {
    445 		for (u32 i = 0; i < zbp->raw_data_dim[2]; i++)
    446 			send_frame(data + i * bp.raw_data_dimensions.E[0] * bp.raw_data_dimensions.E[1], &bp);
    447 	}
    448 
    449 	free(zbp);
    450 	free(data);
    451 }
    452 
    453 function void
    454 sigint(i32 _signo)
    455 {
    456 	g_should_exit = 1;
    457 }
    458 
    459 extern i32
    460 main(i32 argc, char *argv[])
    461 {
    462 	Options options = parse_argv(argc, argv);
    463 
    464 	if (!BETWEEN(options.remaining_count, 1, 2))
    465 		usage(argv[0]);
    466 
    467 	os_common_init();
    468 
    469 	signal(SIGINT, sigint);
    470 
    471 	Arena arena = os_alloc_arena(KB(8));
    472 	Stream path = stream_alloc(&arena, KB(4));
    473 	stream_append_s8(&path, c_str_to_s8(options.remaining[0]));
    474 	stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR);
    475 
    476 	execute_study(c_str_to_s8(options.remaining[1]), arena, path, &options);
    477 
    478 	return 0;
    479 }