decode.c (9233B)
1 /* See LICENSE for license details. */ 2 #define LIB_FN function 3 #include "ogl_beamformer_lib.c" 4 5 #include <signal.h> 6 #include <stdarg.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 10 #define AVERAGE_SAMPLES countof(((BeamformerComputeStatsTable *)0)->times) 11 //#define RF_TIME_SAMPLES 2432 12 #define RF_TIME_SAMPLES 4096 13 14 read_only global u32 decode_transmit_counts[] = { 15 2, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64, 80, 96, 128, 160, 192, 256 16 }; 17 18 typedef struct { 19 b32 loop; 20 b32 cuda; 21 b32 once; 22 b32 dump; 23 b32 full_aperture; 24 25 u32 warmup_count; 26 27 char *outdir; 28 29 char **remaining; 30 i32 remaining_count; 31 } Options; 32 33 global b32 g_should_exit; 34 35 #define die(...) die_((char *)__func__, __VA_ARGS__) 36 function no_return void 37 die_(char *function_name, char *format, ...) 38 { 39 if (function_name) 40 fprintf(stderr, "%s: ", function_name); 41 42 va_list ap; 43 44 va_start(ap, format); 45 vfprintf(stderr, format, ap); 46 va_end(ap); 47 48 os_exit(1); 49 } 50 51 #if OS_LINUX 52 53 function void 54 os_make_directory(char *name) 55 { 56 mkdir(name, 0770); 57 } 58 59 #elif OS_WINDOWS 60 61 W32(b32) CreateDirectoryA(c8 *, void *); 62 63 function void 64 os_make_directory(char *name) 65 { 66 CreateDirectoryA(name, 0); 67 } 68 69 #else 70 #error Unsupported Platform 71 #endif 72 73 #define shift_n(v, c, n) v += n, c -= n 74 #define shift(v, c) shift_n(v, c, 1) 75 #define unshift(v, c) shift_n(v, c, -1) 76 77 function void 78 usage(char *argv0) 79 { 80 die("%s [--loop] [--once] [--full-aperture] [--cuda] [--warmup n] [--dump dir]\n" 81 " --loop: reupload data forever\n" 82 " --once: only run a single frame\n" 83 " --full-aperture: recieve on full 256 channel aperture\n" 84 " --cuda: use cuda for decoding\n" 85 " --warmup: warmup with n runs\n" 86 " --dump: dump output stats files to dir\n", 87 argv0); 88 } 89 90 function Options 91 parse_argv(i32 argc, char *argv[]) 92 { 93 Options result = {0}; 94 95 char *argv0 = argv[0]; 96 shift(argv, argc); 97 98 while (argc > 0) { 99 s8 arg = c_str_to_s8(*argv); 100 shift(argv, argc); 101 102 if (s8_equal(arg, s8("--loop"))) { 103 result.loop = 1; 104 } else if (s8_equal(arg, s8("--full-aperture"))) { 105 result.full_aperture = 1; 106 } else if (s8_equal(arg, s8("--cuda"))) { 107 result.cuda = 1; 108 } else if (s8_equal(arg, s8("--dump"))) { 109 if (argc) { 110 result.outdir = *argv; 111 result.dump = 1; 112 shift(argv, argc); 113 } else { 114 die("expected directory to dump to\n"); 115 } 116 } else if (s8_equal(arg, s8("--once"))) { 117 result.once = 1; 118 } else if (s8_equal(arg, s8("--warmup"))) { 119 if (argc) { 120 result.warmup_count = (u32)atoi(*argv); 121 shift(argv, argc); 122 } 123 } else if (arg.len > 0 && arg.data[0] == '-') { 124 usage(argv0); 125 } else { 126 unshift(argv, argc); 127 break; 128 } 129 } 130 131 result.remaining = argv; 132 result.remaining_count = argc; 133 134 return result; 135 } 136 137 function b32 138 send_frame(i16 *restrict i16_data, u32 data_size) 139 { 140 b32 result = beamformer_push_data_with_compute(i16_data, data_size, BeamformerViewPlaneTag_XZ, 0); 141 if (!result && !g_should_exit) printf("lib error: %s\n", beamformer_get_last_error_string()); 142 return result; 143 } 144 145 function uv4 146 decoded_data_dim(u32 transmit_count, b32 full_aperture) 147 { 148 u32 max_transmits = decode_transmit_counts[countof(decode_transmit_counts) - 1]; 149 uv4 result = {{RF_TIME_SAMPLES, full_aperture? max_transmits: transmit_count, transmit_count, 1}}; 150 return result; 151 } 152 153 function uv2 154 raw_data_dim(u32 transmit_count, b32 full_aperture) 155 { 156 uv4 dec = decoded_data_dim(transmit_count, full_aperture); 157 uv2 result = {{dec.x * transmit_count, 256}}; 158 return result; 159 } 160 161 function u32 162 data_size_for_transmit_count(u32 transmit_count, b32 full_aperture) 163 { 164 uv2 rf_dim = raw_data_dim(transmit_count, full_aperture); 165 u32 result = rf_dim.x * rf_dim.y * sizeof(i16); 166 return result; 167 } 168 169 function i16 * 170 generate_test_data_for_transmit_count(u32 transmit_count, b32 full_aperture) 171 { 172 uz rf_size = data_size_for_transmit_count(transmit_count, full_aperture); 173 i16 *result = malloc(rf_size); 174 if (!result) die("malloc\n"); 175 return result; 176 } 177 178 function void 179 dump_stats(BeamformerComputeStatsTable *stats, Options *options, u32 transmit_count) 180 { 181 char path_buffer[1024]; 182 Stream sb = {.data = (u8 *)path_buffer, .cap = sizeof(path_buffer)}; 183 stream_append_s8s(&sb, c_str_to_s8(options->outdir), s8(OS_PATH_SEPARATOR), s8("decode_")); 184 if (options->cuda) stream_append_s8(&sb, s8("cuda_")); 185 stream_append_u64(&sb, transmit_count); 186 stream_append_s8(&sb, s8(".bin")); 187 stream_append_byte(&sb, 0); 188 os_write_new_file(path_buffer, (s8){.len = sizeof(*stats), .data = (u8 *)stats}); 189 } 190 191 function void 192 send_parameters(Options *options, u32 transmit_count) 193 { 194 BeamformerParameters bp = {0}; 195 bp.decode_mode = BeamformerDecodeMode_Hadamard; 196 b32 full_aperture = options->full_aperture; 197 uv3 dec_data_dim = decoded_data_dim(transmit_count, full_aperture).xyz; 198 bp.sample_count = dec_data_dim.x; 199 bp.channel_count = dec_data_dim.y; 200 bp.acquisition_count = dec_data_dim.z; 201 202 bp.raw_data_dimensions = raw_data_dim(transmit_count, full_aperture); 203 beamformer_push_parameters(&bp); 204 205 /* NOTE(rnp): use real channel mapping so that we still get ~random~ access pattern */ 206 read_only local_persist i16 channel_mapping[] = { 207 217, 129, 212, 188, 255, 131, 237, 190, 241, 130, 248, 187, 219, 128, 218, 181, 208 216, 134, 247, 180, 220, 132, 238, 178, 246, 133, 240, 179, 221, 135, 239, 173, 209 231, 137, 211, 172, 222, 139, 213, 170, 249, 138, 210, 171, 223, 136, 232, 189, 210 233, 142, 209, 164, 224, 140, 214, 186, 254, 141, 208, 163, 225, 143, 215, 185, 211 230, 145, 204, 162, 226, 147, 206, 165, 229, 146, 207, 161, 227, 144, 205, 182, 212 234, 150, 203, 160, 228, 148, 201, 166, 236, 149, 200, 159, 235, 175, 202, 177, 213 242, 151, 196, 191, 243, 155, 198, 167, 245, 154, 199, 158, 244, 176, 197, 174, 214 250, 168, 195, 184, 251, 156, 193, 152, 253, 153, 192, 157, 252, 183, 194, 169, 215 102, 62, 71, 3, 100, 60, 82, 1, 78, 61, 72, 4, 64, 63, 101, 10, 216 103, 57, 107, 11, 99, 59, 81, 13, 73, 58, 79, 12, 98, 56, 80, 18, 217 88, 54, 108, 19, 97, 52, 106, 21, 70, 53, 109, 20, 96, 55, 87, 2, 218 86, 49, 110, 27, 95, 51, 105, 5, 65, 50, 111, 28, 94, 48, 104, 6, 219 89, 46, 115, 29, 93, 44, 113, 26, 90, 45, 112, 30, 92, 47, 114, 9, 220 85, 41, 116, 31, 91, 43, 118, 25, 83, 42, 119, 32, 84, 16, 117, 14, 221 77, 40, 123, 0, 76, 36, 121, 24, 74, 37, 120, 33, 75, 15, 122, 17, 222 69, 23, 124, 7, 68, 35, 126, 39, 66, 38, 127, 34, 67, 8, 125, 22, 223 }; 224 beamformer_push_channel_mapping(channel_mapping, countof(channel_mapping)); 225 226 i32 shader_stages[1]; 227 if (options->cuda) shader_stages[0] = BeamformerShaderKind_CudaDecode; 228 else shader_stages[0] = BeamformerShaderKind_Decode; 229 beamformer_push_pipeline(shader_stages, 1, BeamformerDataKind_Int16); 230 beamformer_set_global_timeout(1000); 231 } 232 233 function f32 234 execute_study(Options *options, u32 transmit_count, i16 *restrict data) 235 { 236 send_parameters(options, transmit_count); 237 u32 data_size = data_size_for_transmit_count(transmit_count, options->full_aperture); 238 for (u32 i = 0; !g_should_exit && i < options->warmup_count; i++) 239 send_frame(data, data_size); 240 241 u64 start = os_timer_count(); 242 f64 frequency = os_timer_frequency(); 243 for (u32 i = 0; !g_should_exit && i < AVERAGE_SAMPLES; i++) 244 send_frame(data, data_size); 245 f32 result = (os_timer_count() - start) / frequency / (f32)AVERAGE_SAMPLES; 246 247 return result; 248 } 249 250 function void 251 print_result(u32 transmit_count, f32 time) 252 { 253 printf("decode %3u | %uF Average: %8.3f [ms]\n", transmit_count, (u32)AVERAGE_SAMPLES, time * 1e3); 254 } 255 256 function void 257 sigint(i32 _signo) 258 { 259 g_should_exit = 1; 260 } 261 262 extern i32 263 main(i32 argc, char *argv[]) 264 { 265 Options options = parse_argv(argc, argv); 266 267 if (options.remaining_count) 268 usage(argv[0]); 269 270 if (options.dump) os_make_directory(options.outdir); 271 272 signal(SIGINT, sigint); 273 274 BeamformerLiveImagingParameters lip = {.active = 1, .save_enabled = 1}; 275 s8 short_name = s8("Decode Bench"); 276 mem_copy(lip.save_name_tag, short_name.data, (uz)short_name.len); 277 lip.save_name_tag_length = (i32)short_name.len; 278 beamformer_set_live_parameters(&lip); 279 280 u32 max_transmit_count = decode_transmit_counts[countof(decode_transmit_counts) - 1]; 281 i16 *data = generate_test_data_for_transmit_count(max_transmit_count, options.full_aperture); 282 if (options.loop) { 283 for (;!g_should_exit;) { 284 u32 transmit_count = decode_transmit_counts[0]; 285 f32 time = execute_study(&options, transmit_count, data); 286 if (!g_should_exit) print_result(transmit_count, time); 287 } 288 } else if (options.once) { 289 u32 transmit_count = decode_transmit_counts[0]; 290 u32 data_size = data_size_for_transmit_count(transmit_count, options.full_aperture); 291 send_parameters(&options, transmit_count); 292 send_frame(data, data_size); 293 } else { 294 BeamformerComputeStatsTable stats = {0}; 295 for (iz i = 0; i < countof(decode_transmit_counts); i++) { 296 u32 transmit_count = decode_transmit_counts[i]; 297 f32 time = execute_study(&options, transmit_count, data); 298 if (options.dump) { 299 beamformer_compute_timings(&stats, 1000); 300 dump_stats(&stats, &options, transmit_count); 301 } 302 if (!g_should_exit) print_result(transmit_count, time); 303 } 304 } 305 306 lip.active = 0; 307 beamformer_set_live_parameters(&lip); 308 309 return 0; 310 }