ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

throughput.c (13100B)


      1 /* See LICENSE for license details. */
      2 /* TODO(rnp):
      3  * [ ]: for finer grained evaluation of throughput latency just queue a data upload
      4  *      without replacing the data.
      5  * [ ]: bug: we aren't inserting rf data between each frame
      6  */
      7 
      8 #define BEAMFORMER_LIB_EXPORT function
      9 #include "ogl_beamformer_lib.c"
     10 
     11 #include <signal.h>
     12 #include <stdarg.h>
     13 #include <stdio.h>
     14 #include <stdlib.h>
     15 #include <zstd.h>
     16 
     17 global iv3 g_output_points    = {{512, 1, 1024}};
     18 global v2  g_axial_extent     = {{ 10e-3f, 165e-3f}};
     19 global v2  g_lateral_extent   = {{-60e-3f,  60e-3f}};
     20 global f32 g_f_number         = 0.5f;
     21 
     22 typedef struct {
     23 	b32 loop;
     24 	b32 cuda;
     25 	u32 frame_number;
     26 
     27 	char **remaining;
     28 	i32    remaining_count;
     29 } Options;
     30 
     31 #define ZEMP_BP_MAGIC (uint64_t)0x5042504D455AFECAull
     32 typedef struct {
     33 	u64 magic;
     34 	u32 version;
     35 	u16 decode_mode;
     36 	u16 beamform_mode;
     37 	u32 raw_data_dim[4];
     38 	u32 decoded_data_dim[4];
     39 	f32 xdc_element_pitch[2];
     40 	f32 xdc_transform[16]; /* NOTE: column major order */
     41 	i16 channel_mapping[256];
     42 	f32 transmit_angles[256];
     43 	f32 focal_depths[256];
     44 	i16 sparse_elements[256];
     45 	i16 hadamard_rows[256];
     46 	f32 speed_of_sound;
     47 	f32 center_frequency;
     48 	f32 sampling_frequency;
     49 	f32 time_offset;
     50 	u32 transmit_mode;
     51 } zemp_bp_v1;
     52 
     53 global b32 g_should_exit;
     54 
     55 #define die(...) die_((char *)__func__, __VA_ARGS__)
     56 function no_return void
     57 die_(char *function_name, char *format, ...)
     58 {
     59 	if (function_name)
     60 		fprintf(stderr, "%s: ", function_name);
     61 
     62 	va_list ap;
     63 
     64 	va_start(ap, format);
     65 	vfprintf(stderr, format, ap);
     66 	va_end(ap);
     67 
     68 	os_exit(1);
     69 }
     70 
     71 #if OS_LINUX
     72 
     73 #include <fcntl.h>
     74 #include <sys/stat.h>
     75 #include <unistd.h>
     76 
     77 function s8
     78 os_read_file_simp(char *fname)
     79 {
     80 	s8 result;
     81 	i32 fd = open(fname, O_RDONLY);
     82 	if (fd < 0)
     83 		die("couldn't open file: %s\n", fname);
     84 
     85 	struct stat st;
     86 	if (stat(fname, &st) < 0)
     87 		die("couldn't stat file\n");
     88 
     89 	result.len  = st.st_size;
     90 	result.data = malloc((uz)st.st_size);
     91 	if (!result.data)
     92 		die("couldn't alloc space for reading\n");
     93 
     94 	iz rlen = read(fd, result.data, (u32)st.st_size);
     95 	close(fd);
     96 
     97 	if (rlen != st.st_size)
     98 		die("couldn't read file: %s\n", fname);
     99 
    100 	return result;
    101 }
    102 
    103 #elif OS_WINDOWS
    104 
    105 function s8
    106 os_read_file_simp(char *fname)
    107 {
    108 	s8 result;
    109 	iptr h = CreateFileA(fname, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0);
    110 	if (h == INVALID_FILE)
    111 		die("couldn't open file: %s\n", fname);
    112 
    113 	w32_file_info fileinfo;
    114 	if (!GetFileInformationByHandle(h, &fileinfo))
    115 		die("couldn't get file info\n", stderr);
    116 
    117 	result.len  = fileinfo.nFileSizeLow;
    118 	result.data = malloc(fileinfo.nFileSizeLow);
    119 	if (!result.data)
    120 		die("couldn't alloc space for reading\n");
    121 
    122 	i32 rlen = 0;
    123 	if (!ReadFile(h, result.data, (i32)fileinfo.nFileSizeLow, &rlen, 0) && rlen != (i32)fileinfo.nFileSizeLow)
    124 		die("couldn't read file: %s\n", fname);
    125 	CloseHandle(h);
    126 
    127 	return result;
    128 }
    129 
    130 #else
    131 #error Unsupported Platform
    132 #endif
    133 
    134 function void
    135 stream_ensure_termination(Stream *s, u8 byte)
    136 {
    137 	b32 found = 0;
    138 	if (!s->errors && s->widx > 0)
    139 		found = s->data[s->widx - 1] == byte;
    140 	if (!found) {
    141 		s->errors |= s->cap - 1 < s->widx;
    142 		if (!s->errors)
    143 			s->data[s->widx++] = byte;
    144 	}
    145 }
    146 
    147 function void *
    148 decompress_zstd_data(s8 raw)
    149 {
    150 	uz requested_size = ZSTD_getFrameContentSize(raw.data, (uz)raw.len);
    151 	void *out         = malloc(requested_size);
    152 	if (out) {
    153 		uz decompressed = ZSTD_decompress(out, requested_size, raw.data, (uz)raw.len);
    154 		if (decompressed != requested_size) {
    155 			free(out);
    156 			out = 0;
    157 		}
    158 	}
    159 	return out;
    160 }
    161 
    162 function zemp_bp_v1 *
    163 read_zemp_bp_v1(u8 *path)
    164 {
    165 	s8 raw = os_read_file_simp((char *)path);
    166 	zemp_bp_v1 *result = 0;
    167 	if (raw.len == sizeof(zemp_bp_v1) && *(u64 *)raw.data == ZEMP_BP_MAGIC) {
    168 		if (((zemp_bp_v1 *)raw.data)->version == 1)
    169 			result = (zemp_bp_v1 *)raw.data;
    170 	}
    171 	return result;
    172 }
    173 
    174 function void
    175 beamformer_parameters_from_zemp_bp_v1(zemp_bp_v1 *zbp, BeamformerParameters *out)
    176 {
    177 	mem_copy(out->xdc_transform.E,       zbp->xdc_transform,     sizeof(out->xdc_transform));
    178 	mem_copy(out->xdc_element_pitch.E,   zbp->xdc_element_pitch, sizeof(out->xdc_element_pitch));
    179 	mem_copy(out->raw_data_dimensions.E, zbp->raw_data_dim,      sizeof(out->raw_data_dimensions));
    180 
    181 	out->sample_count           = zbp->decoded_data_dim[0];
    182 	out->channel_count          = zbp->decoded_data_dim[1];
    183 	out->acquisition_count      = zbp->decoded_data_dim[2];
    184 	out->decode_mode            = (u8)zbp->decode_mode;
    185 	out->acquisition_kind       = zbp->beamform_mode;
    186 	out->time_offset            = zbp->time_offset;
    187 	out->sampling_frequency     = zbp->sampling_frequency;
    188 	out->demodulation_frequency = zbp->center_frequency;
    189 	out->speed_of_sound         = zbp->speed_of_sound;
    190 }
    191 
    192 #define shift_n(v, c, n) v += n, c -= n
    193 #define shift(v, c) shift_n(v, c, 1)
    194 
    195 function void
    196 usage(char *argv0)
    197 {
    198 	die("%s [--loop] [--cuda] [--frame n] base_path study\n"
    199 	    "    --loop:    reupload data forever\n"
    200 	    "    --cuda:    use cuda for decoding\n"
    201 	    "    --frame n: use frame n of the data for display\n",
    202 	    argv0);
    203 }
    204 
    205 function b32
    206 s8_equal(s8 a, s8 b)
    207 {
    208 	b32 result = a.len == b.len;
    209 	for (iz i = 0; result && i < a.len; i++)
    210 		result &= a.data[i] == b.data[i];
    211 	return result;
    212 }
    213 
    214 function Options
    215 parse_argv(i32 argc, char *argv[])
    216 {
    217 	Options result = {0};
    218 
    219 	char *argv0 = argv[0];
    220 	shift(argv, argc);
    221 
    222 	while (argc > 0) {
    223 		s8 arg = c_str_to_s8(*argv);
    224 
    225 		if (s8_equal(arg, s8("--loop"))) {
    226 			shift(argv, argc);
    227 			result.loop = 1;
    228 		} else if (s8_equal(arg, s8("--cuda"))) {
    229 			shift(argv, argc);
    230 			result.cuda = 1;
    231 		} else if (s8_equal(arg, s8("--frame"))) {
    232 			shift(argv, argc);
    233 			if (argc) {
    234 				result.frame_number = (u32)atoi(*argv);
    235 				shift(argv, argc);
    236 			}
    237 		} else if (arg.len > 0 && arg.data[0] == '-') {
    238 			usage(argv0);
    239 		} else {
    240 			break;
    241 		}
    242 	}
    243 
    244 	result.remaining       = argv;
    245 	result.remaining_count = argc;
    246 
    247 	return result;
    248 }
    249 
    250 function i16 *
    251 decompress_data_at_work_index(Stream *path_base, u32 index)
    252 {
    253 	stream_append_byte(path_base, '_');
    254 	stream_append_u64_width(path_base, index, 2);
    255 	stream_append_s8(path_base, s8(".zst"));
    256 	stream_ensure_termination(path_base, 0);
    257 
    258 	s8 compressed_data = os_read_file_simp((char *)path_base->data);
    259 	i16 *result = decompress_zstd_data(compressed_data);
    260 	if (!result)
    261 		die("failed to decompress data: %s\n", path_base->data);
    262 	free(compressed_data.data);
    263 
    264 	return result;
    265 }
    266 
    267 function b32
    268 send_frame(i16 *restrict i16_data, BeamformerParameters *restrict bp)
    269 {
    270 	u32 data_size = bp->raw_data_dimensions.E[0] * bp->raw_data_dimensions.E[1] * sizeof(i16);
    271 	b32 result    = beamformer_push_data_with_compute(i16_data, data_size, BeamformerViewPlaneTag_XZ, 0);
    272 	if (!result && !g_should_exit) printf("lib error: %s\n", beamformer_get_last_error_string());
    273 
    274 	return result;
    275 }
    276 
    277 function void
    278 execute_study(s8 study, Arena arena, Stream path, Options *options)
    279 {
    280 	fprintf(stderr, "showing: %.*s\n", (i32)study.len, study.data);
    281 
    282 	stream_append_s8(&path, study);
    283 	stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR);
    284 	stream_append_s8(&path, study);
    285 	i32 path_work_index = path.widx;
    286 
    287 	stream_append_s8(&path, s8(".bp"));
    288 	stream_ensure_termination(&path, 0);
    289 
    290 	zemp_bp_v1 *zbp = read_zemp_bp_v1(path.data);
    291 	if (!zbp) die("failed to unpack parameters file\n");
    292 
    293 	BeamformerParameters bp = {0};
    294 	beamformer_parameters_from_zemp_bp_v1(zbp, &bp);
    295 
    296 	bp.output_points.xyz = g_output_points;
    297 	bp.output_points.w   = 1;
    298 
    299 	bp.output_min_coordinate.E[0] = g_lateral_extent.x;
    300 	bp.output_min_coordinate.E[1] = 0;
    301 	bp.output_min_coordinate.E[2] = g_axial_extent.x;
    302 
    303 	bp.output_max_coordinate.E[0] = g_lateral_extent.y;
    304 	bp.output_max_coordinate.E[1] = 0;
    305 	bp.output_max_coordinate.E[2] = g_axial_extent.y;
    306 
    307 	bp.f_number           = g_f_number;
    308 	bp.beamform_plane     = 0;
    309 	bp.interpolation_mode = BeamformerInterpolationMode_Cubic;
    310 
    311 	bp.decimation_rate = 1;
    312 	bp.demodulation_frequency = bp.sampling_frequency / 4;
    313 
    314 	/* NOTE(rnp): v1 files didn't specify sampling mode. it was almost always 4X */
    315 	bp.sampling_mode = BeamformerSamplingMode_4X;
    316 
    317 	#if 0
    318 	BeamformerFilterParameters kaiser = {0};
    319 	kaiser.Kaiser.beta             = 5.65f;
    320 	kaiser.Kaiser.cutoff_frequency = 2.0e6f;
    321 	kaiser.Kaiser.length           = 36;
    322 
    323 	beamformer_create_filter(BeamformerFilterKind_Kaiser, (f32 *)&kaiser.kaiser,
    324 	                         sizeof(kaiser.kaiser), bp.sampling_frequency / 2, 0, 0, 0);
    325 	beamformer_set_pipeline_stage_parameters(0, 0);
    326 	#endif
    327 
    328 	#if 1
    329 	BeamformerFilterParameters matched = {0};
    330 	typeof(matched.matched_chirp) *mp = &matched.matched_chirp;
    331 	mp->duration      = 18e-6f;
    332 	mp->min_frequency = 2.9e6f - bp.demodulation_frequency;
    333 	mp->max_frequency = 6.0e6f - bp.demodulation_frequency;
    334 
    335 	bp.time_offset += mp->duration / 2;
    336 
    337 	beamformer_create_filter(BeamformerFilterKind_MatchedChirp, (f32 *)mp, sizeof(*mp),
    338 	                         bp.sampling_frequency / 2, 1, 0, 0);
    339 	beamformer_set_pipeline_stage_parameters(0, 0);
    340 	#endif
    341 
    342 	if (zbp->sparse_elements[0] == -1) {
    343 		for (i16 i = 0; i < countof(zbp->sparse_elements); i++)
    344 			zbp->sparse_elements[i] = i;
    345 	}
    346 
    347 	b32 tx_rows = (zbp->transmit_mode & (1 << 1)) == 0;
    348 	b32 rx_rows = (zbp->transmit_mode & (1 << 0)) == 0;
    349 	u8 packed_tx_rx = 0;
    350 	if (tx_rows) packed_tx_rx |= BeamformerRCAOrientation_Rows    << 4;
    351 	else         packed_tx_rx |= BeamformerRCAOrientation_Columns << 4;
    352 	if (rx_rows) packed_tx_rx |= BeamformerRCAOrientation_Rows    << 0;
    353 	else         packed_tx_rx |= BeamformerRCAOrientation_Columns << 0;
    354 
    355 	if (bp.acquisition_kind == BeamformerAcquisitionKind_HERCULES ||
    356 	    bp.acquisition_kind == BeamformerAcquisitionKind_UHERCULES)
    357 	{
    358 		bp.single_focus       = 1;
    359 		bp.single_orientation = 1;
    360 
    361 		bp.transmit_receive_orientation = packed_tx_rx;
    362 		bp.focal_vector.E[0] = zbp->transmit_angles[0];
    363 		bp.focal_vector.E[1] = zbp->focal_depths[0];
    364 	} else {
    365 		alignas(64) v2 focal_vectors[BeamformerMaxChannelCount];
    366 		for (u32 i = 0; i < countof(focal_vectors); i++)
    367 			focal_vectors[i] = (v2){{zbp->transmit_angles[i], zbp->focal_depths[i]}};
    368 		beamformer_push_focal_vectors((f32 *)focal_vectors, countof(focal_vectors));
    369 
    370 		alignas(64) u8 transmit_receive_orientations[BeamformerMaxChannelCount];
    371 		for (u32 i = 0; i < countof(transmit_receive_orientations); i++)
    372 			transmit_receive_orientations[i] = packed_tx_rx;
    373 		beamformer_push_transmit_receive_orientations(transmit_receive_orientations,
    374 		                                              countof(transmit_receive_orientations));
    375 	}
    376 
    377 	beamformer_push_channel_mapping(zbp->channel_mapping, countof(zbp->channel_mapping));
    378 	beamformer_push_sparse_elements(zbp->sparse_elements, countof(zbp->sparse_elements));
    379 	beamformer_push_parameters(&bp);
    380 
    381 	i32 shader_stages[16];
    382 	u32 shader_stage_count = 0;
    383 	shader_stages[shader_stage_count++] = BeamformerShaderKind_Demodulate;
    384 	if (options->cuda) shader_stages[shader_stage_count++] = BeamformerShaderKind_CudaDecode;
    385 	else               shader_stages[shader_stage_count++] = BeamformerShaderKind_Decode;
    386 	shader_stages[shader_stage_count++] = BeamformerShaderKind_DAS;
    387 
    388 	beamformer_push_pipeline(shader_stages, shader_stage_count, BeamformerDataKind_Int16);
    389 
    390 	beamformer_set_global_timeout(1000);
    391 
    392 	stream_reset(&path, path_work_index);
    393 	i16 *data = decompress_data_at_work_index(&path, options->frame_number);
    394 
    395 	if (options->loop) {
    396 		BeamformerLiveImagingParameters lip = {.active = 1, .save_enabled = 1};
    397 		s8 short_name = s8("Throughput");
    398 		mem_copy(lip.save_name_tag, short_name.data, (uz)short_name.len);
    399 		lip.save_name_tag_length = (i32)short_name.len;
    400 		beamformer_set_live_parameters(&lip);
    401 
    402 		u32 frame = 0;
    403 		f32 times[32] = {0};
    404 		f32 data_size = (f32)(bp.raw_data_dimensions.E[0] * bp.raw_data_dimensions.E[1] * sizeof(*data));
    405 		u64 start = os_timer_count();
    406 		f64 frequency = os_timer_frequency();
    407 		for (;!g_should_exit;) {
    408 			if (send_frame(data, &bp)) {
    409 				u64 now   = os_timer_count();
    410 				f64 delta = (now - start) / frequency;
    411 				start = now;
    412 
    413 				if ((frame % 16) == 0) {
    414 					f32 sum = 0;
    415 					for (u32 i = 0; i < countof(times); i++)
    416 						sum += times[i] / countof(times);
    417 					printf("Frame Time: %8.3f [ms] | 32-Frame Average: %8.3f [ms] | %8.3f GB/s\n",
    418 					       delta * 1e3, sum * 1e3, data_size / (sum * (GB(1))));
    419 				}
    420 
    421 				times[frame % countof(times)] = delta;
    422 				frame++;
    423 			}
    424 			i32 flag = beamformer_live_parameters_get_dirty_flag();
    425 			if (flag != -1 && (1 << flag) == BeamformerLiveImagingDirtyFlags_StopImaging)
    426 				break;
    427 		}
    428 
    429 		lip.active = 0;
    430 		beamformer_set_live_parameters(&lip);
    431 	} else {
    432 		for (u32 i = 0; i < zbp->raw_data_dim[2]; i++)
    433 			send_frame(data + i * bp.raw_data_dimensions.E[0] * bp.raw_data_dimensions.E[1], &bp);
    434 	}
    435 
    436 	free(zbp);
    437 	free(data);
    438 }
    439 
    440 function void
    441 sigint(i32 _signo)
    442 {
    443 	g_should_exit = 1;
    444 }
    445 
    446 extern i32
    447 main(i32 argc, char *argv[])
    448 {
    449 	Options options = parse_argv(argc, argv);
    450 
    451 	if (!BETWEEN(options.remaining_count, 1, 2))
    452 		usage(argv[0]);
    453 
    454 	signal(SIGINT, sigint);
    455 
    456 	Arena arena = os_alloc_arena(KB(8));
    457 	Stream path = stream_alloc(&arena, KB(4));
    458 	stream_append_s8(&path, c_str_to_s8(options.remaining[0]));
    459 	stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR);
    460 
    461 	execute_study(c_str_to_s8(options.remaining[1]), arena, path, &options);
    462 
    463 	return 0;
    464 }