ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

util.c (10682B)


      1 /* See LICENSE for license details. */
      2 static i32 hadamard_12_12_transpose[] = {
      3 	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
      4 	1, -1, -1,  1, -1, -1, -1,  1,  1,  1, -1,  1,
      5 	1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1, -1,
      6 	1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1,
      7 	1,  1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,
      8 	1,  1,  1, -1,  1, -1, -1,  1, -1, -1, -1,  1,
      9 	1,  1,  1,  1, -1,  1, -1, -1,  1, -1, -1, -1,
     10 	1, -1,  1,  1,  1, -1,  1, -1, -1,  1, -1, -1,
     11 	1, -1, -1,  1,  1,  1, -1,  1, -1, -1,  1, -1,
     12 	1, -1, -1, -1,  1,  1,  1, -1,  1, -1, -1,  1,
     13 	1,  1, -1, -1, -1,  1,  1,  1, -1,  1, -1, -1,
     14 	1, -1,  1, -1, -1, -1,  1,  1,  1, -1,  1, -1,
     15 };
     16 
     17 #define zero_struct(s) mem_clear(s, 0, sizeof(*s));
     18 static void *
     19 mem_clear(void *p_, u8 c, size len)
     20 {
     21 	u8 *p = p_;
     22 	while (len) p[--len] = c;
     23 	return p;
     24 }
     25 
     26 static void
     27 mem_copy(void *restrict src, void *restrict dest, size n)
     28 {
     29 	ASSERT(n >= 0);
     30 	u8 *s = src, *d = dest;
     31 	for (; n; n--) *d++ = *s++;
     32 }
     33 
     34 static void
     35 mem_move(u8 *src, u8 *dest, size n)
     36 {
     37 	if (dest < src) mem_copy(src, dest, n);
     38 	else            while (n) { n--; dest[n] = src[n]; }
     39 }
     40 
     41 
     42 static void
     43 arena_commit(Arena *a, size size)
     44 {
     45 	ASSERT(a->end - a->beg >= size);
     46 	a->beg += size;
     47 }
     48 
     49 #define alloc(a, t, n)    (t *)alloc_(a, sizeof(t), _Alignof(t), n)
     50 #define push_struct(a, t) (t *)alloc_(a, sizeof(t), _Alignof(t), 1)
     51 static void *
     52 alloc_(Arena *a, size len, size align, size count)
     53 {
     54 	/* NOTE: special case 0 arena */
     55 	if (a->beg == 0)
     56 		return 0;
     57 
     58 	size padding   = -(uintptr_t)a->beg & (align - 1);
     59 	size available = a->end - a->beg - padding;
     60 	if (available < 0 || count > available / len)
     61 		ASSERT(0 && "arena OOM\n");
     62 	void *p = a->beg + padding;
     63 	a->beg += padding + count * len;
     64 	/* TODO: Performance? */
     65 	return mem_clear(p, 0, count * len);
     66 }
     67 
     68 static Arena
     69 sub_arena(Arena *a, size len, size align)
     70 {
     71 	Arena result = {0};
     72 
     73 	size padding = -(uintptr_t)a->beg & (align - 1);
     74 	result.beg   = a->beg + padding;
     75 	result.end   = result.beg + len;
     76 	arena_commit(a, len + padding);
     77 
     78 	return result;
     79 }
     80 
     81 static TempArena
     82 begin_temp_arena(Arena *a)
     83 {
     84 	TempArena result = {.arena = a, .old_beg = a->beg};
     85 	return result;
     86 }
     87 
     88 static void
     89 end_temp_arena(TempArena ta)
     90 {
     91 	Arena *a = ta.arena;
     92 	if (a) {
     93 		ASSERT(a->beg >= ta.old_beg)
     94 		a->beg = ta.old_beg;
     95 	}
     96 }
     97 
     98 static Stream
     99 arena_stream(Arena *a)
    100 {
    101 	Stream result = {0};
    102 	result.data   = a->beg;
    103 	result.cap    = a->end - a->beg;
    104 	a->beg = a->end;
    105 	return result;
    106 }
    107 
    108 static Stream
    109 stream_alloc(Arena *a, size cap)
    110 {
    111 	Stream result = {.cap = cap};
    112 	result.data = alloc(a, u8, cap);
    113 	return result;
    114 }
    115 
    116 static s8
    117 stream_to_s8(Stream *s)
    118 {
    119 	s8 result = {.len = s->widx, .data = s->data};
    120 	return result;
    121 }
    122 
    123 static void
    124 stream_append_byte(Stream *s, u8 b)
    125 {
    126 	s->errors |= s->widx + 1 > s->cap;
    127 	if (!s->errors)
    128 		s->data[s->widx++] = b;
    129 }
    130 
    131 static void
    132 stream_append_s8(Stream *s, s8 str)
    133 {
    134 	s->errors |= (s->cap - s->widx) < str.len;
    135 	if (!s->errors) {
    136 		mem_copy(str.data, s->data + s->widx, str.len);
    137 		s->widx += str.len;
    138 	}
    139 }
    140 
    141 static void
    142 stream_append_s8_array(Stream *s, s8 *strs, size count)
    143 {
    144 	while (count > 0) {
    145 		stream_append_s8(s, *strs);
    146 		strs++;
    147 		count--;
    148 	}
    149 }
    150 
    151 static void
    152 stream_append_u64(Stream *s, u64 n)
    153 {
    154 	u8 tmp[64];
    155 	u8 *end = tmp + sizeof(tmp);
    156 	u8 *beg = end;
    157 	do { *--beg = '0' + (n % 10); } while (n /= 10);
    158 	stream_append_s8(s, (s8){.len = end - beg, .data = beg});
    159 }
    160 
    161 static void
    162 stream_append_i64(Stream *s, i64 n)
    163 {
    164 	if (n < 0) {
    165 		stream_append_byte(s, '-');
    166 		n *= -1;
    167 	}
    168 	stream_append_u64(s, n);
    169 }
    170 
    171 static void
    172 stream_append_f64(Stream *s, f64 f, i64 prec)
    173 {
    174 	if (f < 0) {
    175 		stream_append_byte(s, '-');
    176 		f *= -1;
    177 	}
    178 
    179 	/* NOTE: round last digit */
    180 	f += 0.5f / prec;
    181 
    182 	if (f >= (f64)(-1UL >> 1)) {
    183 		stream_append_s8(s, s8("inf"));
    184 	} else {
    185 		u64 integral = f;
    186 		u64 fraction = (f - integral) * prec;
    187 		stream_append_u64(s, integral);
    188 		stream_append_byte(s, '.');
    189 		for (i64 i = prec / 10; i > 1; i /= 10) {
    190 			if (i > fraction)
    191 				stream_append_byte(s, '0');
    192 		}
    193 		stream_append_u64(s, fraction);
    194 	}
    195 }
    196 
    197 static void
    198 stream_append_f64_e(Stream *s, f64 f)
    199 {
    200 	/* TODO: there should be a better way of doing this */
    201 	#if 0
    202 	/* NOTE: we ignore subnormal numbers for now */
    203 	union { f64 f; u64 u; } u = {.f = f};
    204 	i32 exponent = ((u.u >> 52) & 0x7ff) - 1023;
    205 	f32 log_10_of_2 = 0.301f;
    206 	i32 scale       = (exponent * log_10_of_2);
    207 	/* NOTE: normalize f */
    208 	for (i32 i = ABS(scale); i > 0; i--)
    209 		f *= (scale > 0)? 0.1f : 10.0f;
    210 	#else
    211 	i32 scale = 0;
    212 	if (f != 0) {
    213 		while (f > 1) {
    214 			f *= 0.1f;
    215 			scale++;
    216 		}
    217 		while (f < 1) {
    218 			f *= 10.0f;
    219 			scale--;
    220 		}
    221 	}
    222 	#endif
    223 
    224 	i32 prec = 100;
    225 	stream_append_f64(s, f, prec);
    226 	stream_append_byte(s, 'e');
    227 	stream_append_byte(s, scale >= 0? '+' : '-');
    228 	for (i32 i = prec / 10; i > 1; i /= 10)
    229 		stream_append_byte(s, '0');
    230 	stream_append_u64(s, ABS(scale));
    231 }
    232 
    233 static void
    234 stream_append_v2(Stream *s, v2 v)
    235 {
    236 	stream_append_byte(s, '{');
    237 	stream_append_f64(s, v.x, 100);
    238 	stream_append_s8(s, s8(", "));
    239 	stream_append_f64(s, v.y, 100);
    240 	stream_append_byte(s, '}');
    241 }
    242 
    243 static void
    244 stream_append_variable(Stream *s, Variable *var)
    245 {
    246 	switch (var->type) {
    247 	case VT_F32: {
    248 		f32 *f32_val = var->store;
    249 		stream_append_f64(s, *f32_val * var->display_scale, 100);
    250 	} break;
    251 	case VT_I32: {
    252 		i32 *i32_val = var->store;
    253 		stream_append_i64(s, *i32_val * var->display_scale);
    254 	} break;
    255 	default: INVALID_CODE_PATH;
    256 	}
    257 }
    258 
    259 /* NOTE(rnp): FNV-1a hash */
    260 static u64
    261 s8_hash(s8 v)
    262 {
    263 	u64 h = 0x3243f6a8885a308d; /* digits of pi */
    264 	for (; v.len; v.len--) {
    265 		h ^= v.data[v.len - 1] & 0xFF;
    266 		h *= 1111111111111111111; /* random prime */
    267 	}
    268 	return h;
    269 }
    270 
    271 static s8
    272 cstr_to_s8(char *cstr)
    273 {
    274 	s8 result = {.data = (u8 *)cstr};
    275 	while (*cstr) { result.len++; cstr++; }
    276 	return result;
    277 }
    278 
    279 /* NOTE(rnp): returns < 0 if byte is not found */
    280 static size
    281 s8_scan_backwards(s8 s, u8 byte)
    282 {
    283 	size result = s.len;
    284 	while (result && s.data[result - 1] != byte) result--;
    285 	result--;
    286 	return result;
    287 }
    288 
    289 static s8
    290 s8_cut_head(s8 s, size cut)
    291 {
    292 	s8 result = s;
    293 	if (cut > 0) {
    294 		result.data += cut;
    295 		result.len  -= cut;
    296 	}
    297 	return result;
    298 }
    299 
    300 static s8
    301 s8alloc(Arena *a, size len)
    302 {
    303 	return (s8){ .data = alloc(a, u8, len), .len = len };
    304 }
    305 
    306 static s8
    307 push_s8(Arena *a, s8 str)
    308 {
    309 	s8 result = s8alloc(a, str.len);
    310 	mem_copy(str.data, result.data, result.len);
    311 	return result;
    312 }
    313 
    314 static u32
    315 round_down_power_of_2(u32 a)
    316 {
    317 	u32 result = 0x80000000UL >> clz_u32(a);
    318 	return result;
    319 }
    320 
    321 static b32
    322 uv2_equal(uv2 a, uv2 b)
    323 {
    324 	return a.x == b.x && a.y == b.y;
    325 }
    326 
    327 static b32
    328 uv3_equal(uv3 a, uv3 b)
    329 {
    330 	return a.x == b.x && a.y == b.y && a.z == b.z;
    331 }
    332 
    333 static v3
    334 cross(v3 a, v3 b)
    335 {
    336 	v3 result = {
    337 		.x = a.y * b.z - a.z * b.y,
    338 		.y = a.z * b.x - a.x * b.z,
    339 		.z = a.x * b.y - a.y * b.x,
    340 	};
    341 	return result;
    342 }
    343 
    344 static v3
    345 sub_v3(v3 a, v3 b)
    346 {
    347 	v3 result = {
    348 		.x = a.x - b.x,
    349 		.y = a.y - b.y,
    350 		.z = a.z - b.z,
    351 	};
    352 	return result;
    353 }
    354 
    355 static f32
    356 length_v3(v3 a)
    357 {
    358 	f32 result = a.x * a.x + a.y * a.y + a.z * a.z;
    359 	return result;
    360 }
    361 
    362 static v3
    363 normalize_v3(v3 a)
    364 {
    365 	f32 length = length_v3(a);
    366 	v3 result = {.x = a.x / length, .y = a.y / length, .z = a.z / length};
    367 	return result;
    368 }
    369 
    370 static v2
    371 clamp_v2_rect(v2 v, Rect r)
    372 {
    373 	v2 result = v;
    374 	result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x);
    375 	result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y);
    376 	return result;
    377 }
    378 
    379 static v2
    380 sub_v2(v2 a, v2 b)
    381 {
    382 	v2 result = {
    383 		.x = a.x - b.x,
    384 		.y = a.y - b.y,
    385 	};
    386 	return result;
    387 }
    388 
    389 static v2
    390 mul_v2(v2 a, v2 b)
    391 {
    392 	v2 result = {
    393 		.x = a.x * b.x,
    394 		.y = a.y * b.y,
    395 	};
    396 	return result;
    397 }
    398 
    399 
    400 static f32
    401 magnitude_v2(v2 a)
    402 {
    403 	f32 result = sqrt_f32(a.x * a.x + a.y * a.y);
    404 	return result;
    405 }
    406 
    407 static b32
    408 uv4_equal(uv4 a, uv4 b)
    409 {
    410 	return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w;
    411 }
    412 
    413 static v4
    414 sub_v4(v4 a, v4 b)
    415 {
    416 	v4 result;
    417 	result.x = a.x - b.x;
    418 	result.y = a.y - b.y;
    419 	result.z = a.z - b.z;
    420 	result.w = a.w - b.w;
    421 	return result;
    422 }
    423 
    424 static f64
    425 parse_f64(s8 s)
    426 {
    427 	f64 integral = 0, fractional = 0, sign = 1;
    428 
    429 	if (s.len && *s.data == '-') {
    430 		sign = -1;
    431 		s.data++;
    432 		s.len--;
    433 	}
    434 
    435 	while (s.len && *s.data != '.') {
    436 		integral *= 10;
    437 		integral += *s.data - '0';
    438 		s.data++;
    439 		s.len--;
    440 	}
    441 
    442 	if (*s.data == '.') { s.data++; s.len--; }
    443 
    444 	while (s.len) {
    445 		ASSERT(s.data[s.len - 1] != '.');
    446 		fractional /= 10;
    447 		fractional += (f64)(s.data[--s.len] - '0') / 10.0;
    448 	}
    449 	f64 result = sign * (integral + fractional);
    450 	return result;
    451 }
    452 
    453 static FileWatchDirectory *
    454 lookup_file_watch_directory(FileWatchContext *ctx, u64 hash)
    455 {
    456 	FileWatchDirectory *result = 0;
    457 
    458 	for (u32 i = 0; i < ctx->directory_watch_count; i++) {
    459 		FileWatchDirectory *test = ctx->directory_watches + i;
    460 		if (test->hash == hash) {
    461 			result = test;
    462 			break;
    463 		}
    464 	}
    465 
    466 	return result;
    467 }
    468 
    469 static void
    470 insert_file_watch(FileWatchDirectory *dir, s8 name, iptr user_data, file_watch_callback *callback)
    471 {
    472 	ASSERT(dir->file_watch_count < ARRAY_COUNT(dir->file_watches));
    473 	FileWatch *fw = dir->file_watches + dir->file_watch_count++;
    474 	fw->hash      = s8_hash(name);
    475 	fw->user_data = user_data;
    476 	fw->callback  = callback;
    477 }
    478 
    479 static void
    480 fill_kronecker_sub_matrix(i32 *out, i32 out_stride, i32 scale, i32 *b, uv2 b_dim)
    481 {
    482 	f32x4 vscale = dup_f32x4(scale);
    483 	for (u32 i = 0; i < b_dim.y; i++) {
    484 		for (u32 j = 0; j < b_dim.x; j += 4, b += 4) {
    485 			f32x4 vb = cvt_i32x4_f32x4(load_i32x4(b));
    486 			store_i32x4(cvt_f32x4_i32x4(mul_f32x4(vscale, vb)), out + j);
    487 		}
    488 		out += out_stride;
    489 	}
    490 }
    491 
    492 /* NOTE: this won't check for valid space/etc and assumes row major order */
    493 static void
    494 kronecker_product(i32 *out, i32 *a, uv2 a_dim, i32 *b, uv2 b_dim)
    495 {
    496 	uv2 out_dim = {.x = a_dim.x * b_dim.x, .y = a_dim.y * b_dim.y};
    497 	ASSERT(out_dim.y % 4 == 0);
    498 	for (u32 i = 0; i < a_dim.y; i++) {
    499 		i32 *vout = out;
    500 		for (u32 j = 0; j < a_dim.x; j++, a++) {
    501 			fill_kronecker_sub_matrix(vout, out_dim.y, *a, b, b_dim);
    502 			vout += b_dim.y;
    503 		}
    504 		out += out_dim.y * b_dim.x;
    505 	}
    506 }
    507 
    508 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */
    509 static void
    510 fill_hadamard_transpose(i32 *out, i32 *tmp, u32 dim)
    511 {
    512 	ASSERT(dim);
    513 	b32 power_of_2     = ISPOWEROF2(dim);
    514 	b32 multiple_of_12 = dim % 12 == 0;
    515 
    516 	if (!power_of_2 && !multiple_of_12)
    517 		return;
    518 
    519 	if (!power_of_2) {
    520 		ASSERT(multiple_of_12);
    521 		dim /= 12;
    522 	}
    523 
    524 	i32 *m;
    525 	if (power_of_2) m = out;
    526 	else            m = tmp;
    527 
    528 	#define IND(i, j) ((i) * dim + (j))
    529 	m[0] = 1;
    530 	for (u32 k = 1; k < dim; k *= 2) {
    531 		for (u32 i = 0; i < k; i++) {
    532 			for (u32 j = 0; j < k; j++) {
    533 				i32 val = m[IND(i, j)];
    534 				m[IND(i + k, j)]     =  val;
    535 				m[IND(i, j + k)]     =  val;
    536 				m[IND(i + k, j + k)] = -val;
    537 			}
    538 		}
    539 	}
    540 	#undef IND
    541 
    542 	if (!power_of_2)
    543 		kronecker_product(out, tmp, (uv2){.x = dim, .y = dim}, hadamard_12_12_transpose,
    544 		                  (uv2){.x = 12, .y = 12});
    545 }