ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

decode.c (9233B)


      1 /* See LICENSE for license details. */
      2 #define LIB_FN function
      3 #include "ogl_beamformer_lib.c"
      4 
      5 #include <signal.h>
      6 #include <stdarg.h>
      7 #include <stdio.h>
      8 #include <stdlib.h>
      9 
     10 #define AVERAGE_SAMPLES countof(((BeamformerComputeStatsTable *)0)->times)
     11 //#define RF_TIME_SAMPLES 2432
     12 #define RF_TIME_SAMPLES 4096
     13 
     14 read_only global u32 decode_transmit_counts[] = {
     15 	2, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64, 80, 96, 128, 160, 192, 256
     16 };
     17 
     18 typedef struct {
     19 	b32 loop;
     20 	b32 cuda;
     21 	b32 once;
     22 	b32 dump;
     23 	b32 full_aperture;
     24 
     25 	u32 warmup_count;
     26 
     27 	char *outdir;
     28 
     29 	char **remaining;
     30 	i32    remaining_count;
     31 } Options;
     32 
     33 global b32 g_should_exit;
     34 
     35 #define die(...) die_((char *)__func__, __VA_ARGS__)
     36 function no_return void
     37 die_(char *function_name, char *format, ...)
     38 {
     39 	if (function_name)
     40 		fprintf(stderr, "%s: ", function_name);
     41 
     42 	va_list ap;
     43 
     44 	va_start(ap, format);
     45 	vfprintf(stderr, format, ap);
     46 	va_end(ap);
     47 
     48 	os_exit(1);
     49 }
     50 
     51 #if OS_LINUX
     52 
     53 function void
     54 os_make_directory(char *name)
     55 {
     56 	mkdir(name, 0770);
     57 }
     58 
     59 #elif OS_WINDOWS
     60 
     61 W32(b32) CreateDirectoryA(c8 *, void *);
     62 
     63 function void
     64 os_make_directory(char *name)
     65 {
     66 	CreateDirectoryA(name, 0);
     67 }
     68 
     69 #else
     70 #error Unsupported Platform
     71 #endif
     72 
     73 #define shift_n(v, c, n) v += n, c -= n
     74 #define shift(v, c)   shift_n(v, c, 1)
     75 #define unshift(v, c) shift_n(v, c, -1)
     76 
     77 function void
     78 usage(char *argv0)
     79 {
     80 	die("%s [--loop] [--once] [--full-aperture] [--cuda] [--warmup n] [--dump dir]\n"
     81 	    "    --loop:          reupload data forever\n"
     82 	    "    --once:          only run a single frame\n"
     83 	    "    --full-aperture: recieve on full 256 channel aperture\n"
     84 	    "    --cuda:          use cuda for decoding\n"
     85 	    "    --warmup:        warmup with n runs\n"
     86 	    "    --dump:          dump output stats files to dir\n",
     87 	    argv0);
     88 }
     89 
     90 function Options
     91 parse_argv(i32 argc, char *argv[])
     92 {
     93 	Options result = {0};
     94 
     95 	char *argv0 = argv[0];
     96 	shift(argv, argc);
     97 
     98 	while (argc > 0) {
     99 		s8 arg = c_str_to_s8(*argv);
    100 		shift(argv, argc);
    101 
    102 		if (s8_equal(arg, s8("--loop"))) {
    103 			result.loop = 1;
    104 		} else if (s8_equal(arg, s8("--full-aperture"))) {
    105 			result.full_aperture = 1;
    106 		} else if (s8_equal(arg, s8("--cuda"))) {
    107 			result.cuda = 1;
    108 		} else if (s8_equal(arg, s8("--dump"))) {
    109 			if (argc) {
    110 				result.outdir = *argv;
    111 				result.dump   = 1;
    112 				shift(argv, argc);
    113 			} else {
    114 				die("expected directory to dump to\n");
    115 			}
    116 		} else if (s8_equal(arg, s8("--once"))) {
    117 			result.once = 1;
    118 		} else if (s8_equal(arg, s8("--warmup"))) {
    119 			if (argc) {
    120 				result.warmup_count = (u32)atoi(*argv);
    121 				shift(argv, argc);
    122 			}
    123 		} else if (arg.len > 0 && arg.data[0] == '-') {
    124 			usage(argv0);
    125 		} else {
    126 			unshift(argv, argc);
    127 			break;
    128 		}
    129 	}
    130 
    131 	result.remaining       = argv;
    132 	result.remaining_count = argc;
    133 
    134 	return result;
    135 }
    136 
    137 function b32
    138 send_frame(i16 *restrict i16_data, u32 data_size)
    139 {
    140 	b32 result = beamformer_push_data_with_compute(i16_data, data_size, BeamformerViewPlaneTag_XZ, 0);
    141 	if (!result && !g_should_exit) printf("lib error: %s\n", beamformer_get_last_error_string());
    142 	return result;
    143 }
    144 
    145 function uv4
    146 decoded_data_dim(u32 transmit_count, b32 full_aperture)
    147 {
    148 	u32 max_transmits = decode_transmit_counts[countof(decode_transmit_counts) - 1];
    149 	uv4 result = {{RF_TIME_SAMPLES, full_aperture? max_transmits: transmit_count, transmit_count, 1}};
    150 	return result;
    151 }
    152 
    153 function uv2
    154 raw_data_dim(u32 transmit_count, b32 full_aperture)
    155 {
    156 	uv4 dec = decoded_data_dim(transmit_count, full_aperture);
    157 	uv2 result = {{dec.x * transmit_count,  256}};
    158 	return result;
    159 }
    160 
    161 function u32
    162 data_size_for_transmit_count(u32 transmit_count, b32 full_aperture)
    163 {
    164 	uv2 rf_dim = raw_data_dim(transmit_count, full_aperture);
    165 	u32 result = rf_dim.x * rf_dim.y * sizeof(i16);
    166 	return result;
    167 }
    168 
    169 function i16 *
    170 generate_test_data_for_transmit_count(u32 transmit_count, b32 full_aperture)
    171 {
    172 	uz  rf_size = data_size_for_transmit_count(transmit_count, full_aperture);
    173 	i16 *result = malloc(rf_size);
    174 	if (!result) die("malloc\n");
    175 	return result;
    176 }
    177 
    178 function void
    179 dump_stats(BeamformerComputeStatsTable *stats, Options *options, u32 transmit_count)
    180 {
    181 	char path_buffer[1024];
    182 	Stream sb = {.data = (u8 *)path_buffer, .cap = sizeof(path_buffer)};
    183 	stream_append_s8s(&sb, c_str_to_s8(options->outdir), s8(OS_PATH_SEPARATOR), s8("decode_"));
    184 	if (options->cuda) stream_append_s8(&sb, s8("cuda_"));
    185 	stream_append_u64(&sb, transmit_count);
    186 	stream_append_s8(&sb, s8(".bin"));
    187 	stream_append_byte(&sb, 0);
    188 	os_write_new_file(path_buffer, (s8){.len = sizeof(*stats), .data = (u8 *)stats});
    189 }
    190 
    191 function void
    192 send_parameters(Options *options, u32 transmit_count)
    193 {
    194 	BeamformerParameters bp = {0};
    195 	bp.decode_mode    = BeamformerDecodeMode_Hadamard;
    196 	b32 full_aperture = options->full_aperture;
    197 	uv3 dec_data_dim  = decoded_data_dim(transmit_count, full_aperture).xyz;
    198 	bp.sample_count      = dec_data_dim.x;
    199 	bp.channel_count     = dec_data_dim.y;
    200 	bp.acquisition_count = dec_data_dim.z;
    201 
    202 	bp.raw_data_dimensions = raw_data_dim(transmit_count, full_aperture);
    203 	beamformer_push_parameters(&bp);
    204 
    205 	/* NOTE(rnp): use real channel mapping so that we still get ~random~ access pattern */
    206 	read_only local_persist i16 channel_mapping[] = {
    207 		217, 129, 212, 188, 255, 131, 237, 190, 241, 130, 248, 187, 219, 128, 218, 181,
    208 		216, 134, 247, 180, 220, 132, 238, 178, 246, 133, 240, 179, 221, 135, 239, 173,
    209 		231, 137, 211, 172, 222, 139, 213, 170, 249, 138, 210, 171, 223, 136, 232, 189,
    210 		233, 142, 209, 164, 224, 140, 214, 186, 254, 141, 208, 163, 225, 143, 215, 185,
    211 		230, 145, 204, 162, 226, 147, 206, 165, 229, 146, 207, 161, 227, 144, 205, 182,
    212 		234, 150, 203, 160, 228, 148, 201, 166, 236, 149, 200, 159, 235, 175, 202, 177,
    213 		242, 151, 196, 191, 243, 155, 198, 167, 245, 154, 199, 158, 244, 176, 197, 174,
    214 		250, 168, 195, 184, 251, 156, 193, 152, 253, 153, 192, 157, 252, 183, 194, 169,
    215 		102,  62,  71,   3, 100,  60,  82,   1,  78,  61,  72,   4,  64,  63, 101,  10,
    216 		103,  57, 107,  11,  99,  59,  81,  13,  73,  58,  79,  12,  98,  56,  80,  18,
    217 		 88,  54, 108,  19,  97,  52, 106,  21,  70,  53, 109,  20,  96,  55,  87,   2,
    218 		 86,  49, 110,  27,  95,  51, 105,   5,  65,  50, 111,  28,  94,  48, 104,   6,
    219 		 89,  46, 115,  29,  93,  44, 113,  26,  90,  45, 112,  30,  92,  47, 114,   9,
    220 		 85,  41, 116,  31,  91,  43, 118,  25,  83,  42, 119,  32,  84,  16, 117,  14,
    221 		 77,  40, 123,   0,  76,  36, 121,  24,  74,  37, 120,  33,  75,  15, 122,  17,
    222 		 69,  23, 124,   7,  68,  35, 126,  39,  66,  38, 127,  34,  67,   8, 125,  22,
    223 	};
    224 	beamformer_push_channel_mapping(channel_mapping, countof(channel_mapping));
    225 
    226 	i32 shader_stages[1];
    227 	if (options->cuda) shader_stages[0] = BeamformerShaderKind_CudaDecode;
    228 	else               shader_stages[0] = BeamformerShaderKind_Decode;
    229 	beamformer_push_pipeline(shader_stages, 1, BeamformerDataKind_Int16);
    230 	beamformer_set_global_timeout(1000);
    231 }
    232 
    233 function f32
    234 execute_study(Options *options, u32 transmit_count, i16 *restrict data)
    235 {
    236 	send_parameters(options, transmit_count);
    237 	u32 data_size = data_size_for_transmit_count(transmit_count, options->full_aperture);
    238 	for (u32 i = 0; !g_should_exit && i < options->warmup_count; i++)
    239 		send_frame(data, data_size);
    240 
    241 	u64 start     = os_timer_count();
    242 	f64 frequency = os_timer_frequency();
    243 	for (u32 i = 0; !g_should_exit && i < AVERAGE_SAMPLES; i++)
    244 		send_frame(data, data_size);
    245 	f32 result = (os_timer_count() - start) / frequency / (f32)AVERAGE_SAMPLES;
    246 
    247 	return result;
    248 }
    249 
    250 function void
    251 print_result(u32 transmit_count, f32 time)
    252 {
    253 	printf("decode %3u | %uF Average: %8.3f [ms]\n", transmit_count, (u32)AVERAGE_SAMPLES, time * 1e3);
    254 }
    255 
    256 function void
    257 sigint(i32 _signo)
    258 {
    259 	g_should_exit = 1;
    260 }
    261 
    262 extern i32
    263 main(i32 argc, char *argv[])
    264 {
    265 	Options options = parse_argv(argc, argv);
    266 
    267 	if (options.remaining_count)
    268 		usage(argv[0]);
    269 
    270 	if (options.dump) os_make_directory(options.outdir);
    271 
    272 	signal(SIGINT, sigint);
    273 
    274 	BeamformerLiveImagingParameters lip = {.active = 1, .save_enabled = 1};
    275 	s8 short_name = s8("Decode Bench");
    276 	mem_copy(lip.save_name_tag, short_name.data, (uz)short_name.len);
    277 	lip.save_name_tag_length = (i32)short_name.len;
    278 	beamformer_set_live_parameters(&lip);
    279 
    280 	u32 max_transmit_count = decode_transmit_counts[countof(decode_transmit_counts) - 1];
    281 	i16 *data = generate_test_data_for_transmit_count(max_transmit_count, options.full_aperture);
    282 	if (options.loop) {
    283 		for (;!g_should_exit;) {
    284 			u32 transmit_count = decode_transmit_counts[0];
    285 			f32 time = execute_study(&options, transmit_count, data);
    286 			if (!g_should_exit) print_result(transmit_count, time);
    287 		}
    288 	} else if (options.once) {
    289 		u32 transmit_count = decode_transmit_counts[0];
    290 		u32 data_size = data_size_for_transmit_count(transmit_count, options.full_aperture);
    291 		send_parameters(&options, transmit_count);
    292 		send_frame(data, data_size);
    293 	} else {
    294 		BeamformerComputeStatsTable stats = {0};
    295 		for (iz i = 0; i < countof(decode_transmit_counts); i++) {
    296 			u32 transmit_count = decode_transmit_counts[i];
    297 			f32 time = execute_study(&options, transmit_count, data);
    298 			if (options.dump) {
    299 				beamformer_compute_timings(&stats, 1000);
    300 				dump_stats(&stats, &options, transmit_count);
    301 			}
    302 			if (!g_should_exit) print_result(transmit_count, time);
    303 		}
    304 	}
    305 
    306 	lip.active = 0;
    307 	beamformer_set_live_parameters(&lip);
    308 
    309 	return 0;
    310 }