ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

util.c (17512B)


      1 /* See LICENSE for license details. */
      2 #define zero_struct(s) mem_clear(s, 0, sizeof(*s))
      3 function void *
      4 mem_clear(void *restrict p_, u8 c, iz size)
      5 {
      6 	u8 *p = p_;
      7 	while (size > 0) p[--size] = c;
      8 	return p;
      9 }
     10 
     11 function void
     12 mem_copy(void *restrict dest, void *restrict src, uz n)
     13 {
     14 	u8 *s = src, *d = dest;
     15 	for (; n; n--) *d++ = *s++;
     16 }
     17 
     18 function void
     19 mem_move(u8 *dest, u8 *src, iz n)
     20 {
     21 	if (dest < src) mem_copy(dest, src, n);
     22 	else            while (n) { n--; dest[n] = src[n]; }
     23 }
     24 
     25 
     26 function u8 *
     27 arena_commit(Arena *a, iz size)
     28 {
     29 	ASSERT(a->end - a->beg >= size);
     30 	u8 *result = a->beg;
     31 	a->beg += size;
     32 	return result;
     33 }
     34 
     35 function void
     36 arena_pop(Arena *a, iz length)
     37 {
     38 	a->beg -= length;
     39 }
     40 
     41 #define push_array(a, t, n) (t *)arena_alloc(a, sizeof(t), _Alignof(t), n)
     42 #define push_struct(a, t)   (t *)arena_alloc(a, sizeof(t), _Alignof(t), 1)
     43 function void *
     44 arena_alloc(Arena *a, iz len, iz align, iz count)
     45 {
     46 	/* NOTE: special case 0 arena */
     47 	if (a->beg == 0)
     48 		return 0;
     49 
     50 	iz padding   = -(uintptr_t)a->beg & (align - 1);
     51 	iz available = a->end - a->beg - padding;
     52 	if (available < 0 || count > available / len)
     53 		ASSERT(0 && "arena OOM\n");
     54 	void *p = a->beg + padding;
     55 	a->beg += padding + count * len;
     56 	/* TODO: Performance? */
     57 	return mem_clear(p, 0, count * len);
     58 }
     59 
     60 #define arena_capacity(a, t) arena_capacity_(a, sizeof(t), _Alignof(t))
     61 function iz
     62 arena_capacity_(Arena *a, iz size, iz alignment)
     63 {
     64 	iz padding   = -(uintptr_t)a->beg & (alignment - 1);
     65 	iz available = a->end - a->beg - padding;
     66 	iz result    = available / size;
     67 	return result;
     68 }
     69 
     70 enum { DA_INITIAL_CAP = 4 };
     71 #define da_reserve(a, s, n) \
     72   (s)->data = da_reserve_((a), (s)->data, &(s)->capacity, (s)->count + n, \
     73                           _Alignof(typeof(*(s)->data)), sizeof(*(s)->data))
     74 
     75 #define da_push(a, s) \
     76   ((s)->count == (s)->capacity  \
     77     ? da_reserve(a, s, 1),      \
     78       (s)->data + (s)->count++  \
     79     : (s)->data + (s)->count++)
     80 
     81 function void *
     82 da_reserve_(Arena *a, void *data, iz *capacity, iz needed, iz align, iz size)
     83 {
     84 	iz cap = *capacity;
     85 
     86 	/* NOTE(rnp): handle both 0 initialized DAs and DAs that need to be moved (they started
     87 	 * on the stack or someone allocated something in the middle of the arena during usage) */
     88 	if (!data || a->beg != (u8 *)data + cap * size) {
     89 		void *copy = arena_alloc(a, size, align, cap);
     90 		if (data) mem_copy(copy, data, cap * size);
     91 		data = copy;
     92 	}
     93 
     94 	if (!cap) cap = DA_INITIAL_CAP;
     95 	while (cap < needed) cap *= 2;
     96 	arena_alloc(a, size, align, cap - *capacity);
     97 	*capacity = cap;
     98 	return data;
     99 }
    100 
    101 function Arena
    102 sub_arena(Arena *a, iz len, iz align)
    103 {
    104 	Arena result = {0};
    105 
    106 	iz padding = -(uintptr_t)a->beg & (align - 1);
    107 	result.beg   = a->beg + padding;
    108 	result.end   = result.beg + len;
    109 	arena_commit(a, len + padding);
    110 
    111 	return result;
    112 }
    113 
    114 function TempArena
    115 begin_temp_arena(Arena *a)
    116 {
    117 	TempArena result = {.arena = a, .old_beg = a->beg};
    118 	return result;
    119 }
    120 
    121 function void
    122 end_temp_arena(TempArena ta)
    123 {
    124 	Arena *a = ta.arena;
    125 	if (a) {
    126 		assert(a->beg >= ta.old_beg);
    127 		a->beg = ta.old_beg;
    128 	}
    129 }
    130 
    131 function u32
    132 utf8_encode(u8 *out, u32 cp)
    133 {
    134 	u32 result = 1;
    135 	if (cp <= 0x7F) {
    136 		out[0] = cp & 0x7F;
    137 	} else if (cp <= 0x7FF) {
    138 		result = 2;
    139 		out[0] = ((cp >>  6) & 0x1F) | 0xC0;
    140 		out[1] = ((cp >>  0) & 0x3F) | 0x80;
    141 	} else if (cp <= 0xFFFF) {
    142 		result = 3;
    143 		out[0] = ((cp >> 12) & 0x0F) | 0xE0;
    144 		out[1] = ((cp >>  6) & 0x3F) | 0x80;
    145 		out[2] = ((cp >>  0) & 0x3F) | 0x80;
    146 	} else if (cp <= 0x10FFFF) {
    147 		result = 4;
    148 		out[0] = ((cp >> 18) & 0x07) | 0xF0;
    149 		out[1] = ((cp >> 12) & 0x3F) | 0x80;
    150 		out[2] = ((cp >>  6) & 0x3F) | 0x80;
    151 		out[3] = ((cp >>  0) & 0x3F) | 0x80;
    152 	} else {
    153 		out[0] = '?';
    154 	}
    155 	return result;
    156 }
    157 
    158 function UnicodeDecode
    159 utf16_decode(u16 *data, iz length)
    160 {
    161 	UnicodeDecode result = {.cp = U32_MAX};
    162 	if (length) {
    163 		result.consumed = 1;
    164 		result.cp = data[0];
    165 		if (length > 1 && BETWEEN(data[0], 0xD800, 0xDBFF)
    166 		               && BETWEEN(data[1], 0xDC00, 0xDFFF))
    167 		{
    168 			result.consumed = 2;
    169 			result.cp = ((data[0] - 0xD800) << 10) | ((data[1] - 0xDC00) + 0x10000);
    170 		}
    171 	}
    172 	return result;
    173 }
    174 
    175 function u32
    176 utf16_encode(u16 *out, u32 cp)
    177 {
    178 	u32 result = 1;
    179 	if (cp == U32_MAX) {
    180 		out[0] = '?';
    181 	} else if (cp < 0x10000) {
    182 		out[0] = cp;
    183 	} else {
    184 		u32 value = cp - 0x10000;
    185 		out[0] = 0xD800 + (value >> 10u);
    186 		out[1] = 0xDC00 + (value & 0x3FFu);
    187 		result = 2;
    188 	}
    189 	return result;
    190 }
    191 
    192 function Stream
    193 stream_alloc(Arena *a, iz cap)
    194 {
    195 	Stream result = {.cap = cap};
    196 	result.data = push_array(a, u8, cap);
    197 	return result;
    198 }
    199 
    200 function s8
    201 stream_to_s8(Stream *s)
    202 {
    203 	s8 result = s8("");
    204 	if (!s->errors) result = (s8){.len = s->widx, .data = s->data};
    205 	return result;
    206 }
    207 
    208 function void
    209 stream_reset(Stream *s, iz index)
    210 {
    211 	s->errors = s->cap <= index;
    212 	if (!s->errors)
    213 		s->widx = index;
    214 }
    215 
    216 function void
    217 stream_commit(Stream *s, iz count)
    218 {
    219 	s->errors |= !BETWEEN(s->widx + count, 0, s->cap);
    220 	if (!s->errors)
    221 		s->widx += count;
    222 }
    223 
    224 function void
    225 stream_append(Stream *s, void *data, iz count)
    226 {
    227 	s->errors |= (s->cap - s->widx) < count;
    228 	if (!s->errors) {
    229 		mem_copy(s->data + s->widx, data, count);
    230 		s->widx += count;
    231 	}
    232 }
    233 
    234 function void
    235 stream_append_byte(Stream *s, u8 b)
    236 {
    237 	stream_append(s, &b, 1);
    238 }
    239 
    240 function void
    241 stream_pad(Stream *s, u8 b, i32 n)
    242 {
    243 	while (n > 0) stream_append_byte(s, b), n--;
    244 }
    245 
    246 function void
    247 stream_append_s8(Stream *s, s8 str)
    248 {
    249 	stream_append(s, str.data, str.len);
    250 }
    251 
    252 #define stream_append_s8s(s, ...) stream_append_s8s_(s, arg_list(s8, ##__VA_ARGS__))
    253 function void
    254 stream_append_s8s_(Stream *s, s8 *strs, iz count)
    255 {
    256 	for (iz i = 0; i < count; i++)
    257 		stream_append(s, strs[i].data, strs[i].len);
    258 }
    259 
    260 function void
    261 stream_append_u64(Stream *s, u64 n)
    262 {
    263 	u8 tmp[64];
    264 	u8 *end = tmp + sizeof(tmp);
    265 	u8 *beg = end;
    266 	do { *--beg = '0' + (n % 10); } while (n /= 10);
    267 	stream_append(s, beg, end - beg);
    268 }
    269 
    270 function void
    271 stream_append_hex_u64(Stream *s, u64 n)
    272 {
    273 	if (!s->errors) {
    274 		u8  buf[16];
    275 		u8 *end = buf + sizeof(buf);
    276 		u8 *beg = end;
    277 		while (n) {
    278 			*--beg = "0123456789abcdef"[n & 0x0F];
    279 			n >>= 4;
    280 		}
    281 		while (end - beg < 2)
    282 			*--beg = '0';
    283 		stream_append(s, beg, end - beg);
    284 	}
    285 }
    286 
    287 function void
    288 stream_append_i64(Stream *s, i64 n)
    289 {
    290 	if (n < 0) {
    291 		stream_append_byte(s, '-');
    292 		n *= -1;
    293 	}
    294 	stream_append_u64(s, n);
    295 }
    296 
    297 function void
    298 stream_append_f64(Stream *s, f64 f, i64 prec)
    299 {
    300 	if (f < 0) {
    301 		stream_append_byte(s, '-');
    302 		f *= -1;
    303 	}
    304 
    305 	/* NOTE: round last digit */
    306 	f += 0.5f / prec;
    307 
    308 	if (f >= (f64)(-1UL >> 1)) {
    309 		stream_append_s8(s, s8("inf"));
    310 	} else {
    311 		u64 integral = f;
    312 		u64 fraction = (f - integral) * prec;
    313 		stream_append_u64(s, integral);
    314 		stream_append_byte(s, '.');
    315 		for (i64 i = prec / 10; i > 1; i /= 10) {
    316 			if (i > fraction)
    317 				stream_append_byte(s, '0');
    318 		}
    319 		stream_append_u64(s, fraction);
    320 	}
    321 }
    322 
    323 function void
    324 stream_append_f64_e(Stream *s, f64 f)
    325 {
    326 	/* TODO: there should be a better way of doing this */
    327 	#if 0
    328 	/* NOTE: we ignore subnormal numbers for now */
    329 	union { f64 f; u64 u; } u = {.f = f};
    330 	i32 exponent = ((u.u >> 52) & 0x7ff) - 1023;
    331 	f32 log_10_of_2 = 0.301f;
    332 	i32 scale       = (exponent * log_10_of_2);
    333 	/* NOTE: normalize f */
    334 	for (i32 i = ABS(scale); i > 0; i--)
    335 		f *= (scale > 0)? 0.1f : 10.0f;
    336 	#else
    337 	i32 scale = 0;
    338 	if (f != 0) {
    339 		while (f > 1) {
    340 			f *= 0.1f;
    341 			scale++;
    342 		}
    343 		while (f < 1) {
    344 			f *= 10.0f;
    345 			scale--;
    346 		}
    347 	}
    348 	#endif
    349 
    350 	i32 prec = 100;
    351 	stream_append_f64(s, f, prec);
    352 	stream_append_byte(s, 'e');
    353 	stream_append_byte(s, scale >= 0? '+' : '-');
    354 	for (i32 i = prec / 10; i > 1; i /= 10)
    355 		stream_append_byte(s, '0');
    356 	stream_append_u64(s, ABS(scale));
    357 }
    358 
    359 function void
    360 stream_append_v2(Stream *s, v2 v)
    361 {
    362 	stream_append_byte(s, '{');
    363 	stream_append_f64(s, v.x, 100);
    364 	stream_append_s8(s, s8(", "));
    365 	stream_append_f64(s, v.y, 100);
    366 	stream_append_byte(s, '}');
    367 }
    368 
    369 function Stream
    370 arena_stream(Arena a)
    371 {
    372 	Stream result = {0};
    373 	result.data   = a.beg;
    374 	result.cap    = a.end - a.beg;
    375 	return result;
    376 }
    377 
    378 function s8
    379 arena_stream_commit(Arena *a, Stream *s)
    380 {
    381 	ASSERT(s->data == a->beg);
    382 	s8 result = stream_to_s8(s);
    383 	arena_commit(a, result.len);
    384 	return result;
    385 }
    386 
    387 function s8
    388 arena_stream_commit_zero(Arena *a, Stream *s)
    389 {
    390 	b32 error = s->errors || s->widx == s->cap;
    391 	if (!error)
    392 		s->data[s->widx] = 0;
    393 	s8 result = stream_to_s8(s);
    394 	arena_commit(a, result.len + 1);
    395 	return result;
    396 }
    397 
    398 /* NOTE(rnp): FNV-1a hash */
    399 function u64
    400 s8_hash(s8 v)
    401 {
    402 	u64 h = 0x3243f6a8885a308d; /* digits of pi */
    403 	for (; v.len; v.len--) {
    404 		h ^= v.data[v.len - 1] & 0xFF;
    405 		h *= 1111111111111111111; /* random prime */
    406 	}
    407 	return h;
    408 }
    409 
    410 function s8
    411 c_str_to_s8(char *cstr)
    412 {
    413 	s8 result = {.data = (u8 *)cstr};
    414 	if (cstr) { while (*cstr) { result.len++; cstr++; } }
    415 	return result;
    416 }
    417 
    418 /* NOTE(rnp): returns < 0 if byte is not found */
    419 function iz
    420 s8_scan_backwards(s8 s, u8 byte)
    421 {
    422 	iz result = s.len;
    423 	while (result && s.data[result - 1] != byte) result--;
    424 	result--;
    425 	return result;
    426 }
    427 
    428 function s8
    429 s8_cut_head(s8 s, iz cut)
    430 {
    431 	s8 result = s;
    432 	if (cut > 0) {
    433 		result.data += cut;
    434 		result.len  -= cut;
    435 	}
    436 	return result;
    437 }
    438 
    439 function s8
    440 s8_alloc(Arena *a, iz len)
    441 {
    442 	s8 result = {.data = push_array(a, u8, len), .len = len};
    443 	return result;
    444 }
    445 
    446 function s8
    447 s16_to_s8(Arena *a, s16 in)
    448 {
    449 	s8 result = s8("");
    450 	if (in.len) {
    451 		iz commit = in.len * 4;
    452 		iz length = 0;
    453 		u8 *data = arena_commit(a, commit + 1);
    454 		u16 *beg = in.data;
    455 		u16 *end = in.data + in.len;
    456 		while (beg < end) {
    457 			UnicodeDecode decode = utf16_decode(beg, end - beg);
    458 			length += utf8_encode(data + length, decode.cp);
    459 			beg    += decode.consumed;
    460 		}
    461 		data[length] = 0;
    462 		result = (s8){.len = length, .data = data};
    463 		arena_pop(a, commit - length);
    464 	}
    465 	return result;
    466 }
    467 
    468 function s16
    469 s8_to_s16(Arena *a, s8 in)
    470 {
    471 	s16 result = {0};
    472 	if (in.len) {
    473 		iz required = 2 * in.len + 1;
    474 		u16 *data   = push_array(a, u16, required);
    475 		iz length   = 0;
    476 		/* TODO(rnp): utf8_decode */
    477 		for (iz i = 0; i < in.len; i++) {
    478 			u32 cp  = in.data[i];
    479 			length += utf16_encode(data + length, cp);
    480 		}
    481 		result = (s16){.len = length, .data = data};
    482 		arena_pop(a, required - length);
    483 	}
    484 	return result;
    485 }
    486 
    487 function s8
    488 push_s8(Arena *a, s8 str)
    489 {
    490 	s8 result = s8_alloc(a, str.len);
    491 	mem_copy(result.data, str.data, result.len);
    492 	return result;
    493 }
    494 
    495 function s8
    496 push_s8_zero(Arena *a, s8 str)
    497 {
    498 	s8 result   = s8_alloc(a, str.len + 1);
    499 	result.len -= 1;
    500 	mem_copy(result.data, str.data, result.len);
    501 	return result;
    502 }
    503 
    504 function u32
    505 round_down_power_of_2(u32 a)
    506 {
    507 	u32 result = 0x80000000UL >> clz_u32(a);
    508 	return result;
    509 }
    510 
    511 function b32
    512 uv2_equal(uv2 a, uv2 b)
    513 {
    514 	return a.x == b.x && a.y == b.y;
    515 }
    516 
    517 function b32
    518 uv3_equal(uv3 a, uv3 b)
    519 {
    520 	return a.x == b.x && a.y == b.y && a.z == b.z;
    521 }
    522 
    523 function v3
    524 cross(v3 a, v3 b)
    525 {
    526 	v3 result = {
    527 		.x = a.y * b.z - a.z * b.y,
    528 		.y = a.z * b.x - a.x * b.z,
    529 		.z = a.x * b.y - a.y * b.x,
    530 	};
    531 	return result;
    532 }
    533 
    534 function v3
    535 sub_v3(v3 a, v3 b)
    536 {
    537 	v3 result = {
    538 		.x = a.x - b.x,
    539 		.y = a.y - b.y,
    540 		.z = a.z - b.z,
    541 	};
    542 	return result;
    543 }
    544 
    545 function f32
    546 length_v3(v3 a)
    547 {
    548 	f32 result = a.x * a.x + a.y * a.y + a.z * a.z;
    549 	return result;
    550 }
    551 
    552 function v3
    553 normalize_v3(v3 a)
    554 {
    555 	f32 length = length_v3(a);
    556 	v3 result = {.x = a.x / length, .y = a.y / length, .z = a.z / length};
    557 	return result;
    558 }
    559 
    560 function v2
    561 clamp_v2_rect(v2 v, Rect r)
    562 {
    563 	v2 result = v;
    564 	result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x);
    565 	result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y);
    566 	return result;
    567 }
    568 
    569 function v2
    570 add_v2(v2 a, v2 b)
    571 {
    572 	v2 result = {
    573 		.x = a.x + b.x,
    574 		.y = a.y + b.y,
    575 	};
    576 	return result;
    577 }
    578 
    579 function v2
    580 sub_v2(v2 a, v2 b)
    581 {
    582 	v2 result = {
    583 		.x = a.x - b.x,
    584 		.y = a.y - b.y,
    585 	};
    586 	return result;
    587 }
    588 
    589 function v2
    590 scale_v2(v2 a, f32 scale)
    591 {
    592 	v2 result = {
    593 		.x = a.x * scale,
    594 		.y = a.y * scale,
    595 	};
    596 	return result;
    597 }
    598 
    599 function v2
    600 mul_v2(v2 a, v2 b)
    601 {
    602 	v2 result = {
    603 		.x = a.x * b.x,
    604 		.y = a.y * b.y,
    605 	};
    606 	return result;
    607 }
    608 
    609 function v2
    610 div_v2(v2 a, v2 b)
    611 {
    612 	v2 result;
    613 	result.x = a.x / b.x;
    614 	result.y = a.y / b.y;
    615 	return result;
    616 }
    617 
    618 
    619 function v2
    620 floor_v2(v2 a)
    621 {
    622 	v2 result;
    623 	result.x = (u32)a.x;
    624 	result.y = (u32)a.y;
    625 	return result;
    626 }
    627 
    628 function f32
    629 magnitude_v2(v2 a)
    630 {
    631 	f32 result = sqrt_f32(a.x * a.x + a.y * a.y);
    632 	return result;
    633 }
    634 
    635 function uv4
    636 uv4_from_u32_array(u32 v[4])
    637 {
    638 	uv4 result;
    639 	result.E[0] = v[0];
    640 	result.E[1] = v[1];
    641 	result.E[2] = v[2];
    642 	result.E[3] = v[3];
    643 	return result;
    644 }
    645 
    646 function b32
    647 uv4_equal(uv4 a, uv4 b)
    648 {
    649 	return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w;
    650 }
    651 
    652 function v4
    653 v4_from_f32_array(f32 v[4])
    654 {
    655 	v4 result;
    656 	result.E[0] = v[0];
    657 	result.E[1] = v[1];
    658 	result.E[2] = v[2];
    659 	result.E[3] = v[3];
    660 	return result;
    661 }
    662 
    663 function v4
    664 sub_v4(v4 a, v4 b)
    665 {
    666 	v4 result;
    667 	result.x = a.x - b.x;
    668 	result.y = a.y - b.y;
    669 	result.z = a.z - b.z;
    670 	result.w = a.w - b.w;
    671 	return result;
    672 }
    673 
    674 function void
    675 split_rect_horizontal(Rect rect, f32 fraction, Rect *left, Rect *right)
    676 {
    677 	if (left) {
    678 		left->pos    = rect.pos;
    679 		left->size.h = rect.size.h;
    680 		left->size.w = rect.size.w * fraction;
    681 	}
    682 	if (right) {
    683 		right->pos    = rect.pos;
    684 		right->pos.x += rect.size.w * fraction;
    685 		right->size.h = rect.size.h;
    686 		right->size.w = rect.size.w * (1.0f - fraction);
    687 	}
    688 }
    689 
    690 function void
    691 split_rect_vertical(Rect rect, f32 fraction, Rect *top, Rect *bot)
    692 {
    693 	if (top) {
    694 		top->pos    = rect.pos;
    695 		top->size.w = rect.size.w;
    696 		top->size.h = rect.size.h * fraction;
    697 	}
    698 	if (bot) {
    699 		bot->pos    = rect.pos;
    700 		bot->pos.y += rect.size.h * fraction;
    701 		bot->size.w = rect.size.w;
    702 		bot->size.h = rect.size.h * (1.0f - fraction);
    703 	}
    704 }
    705 
    706 function void
    707 cut_rect_horizontal(Rect rect, f32 at, Rect *left, Rect *right)
    708 {
    709 	at = MIN(at, rect.size.w);
    710 	if (left) {
    711 		*left = rect;
    712 		left->size.w = at;
    713 	}
    714 	if (right) {
    715 		*right = rect;
    716 		right->pos.x  += at;
    717 		right->size.w -= at;
    718 	}
    719 }
    720 
    721 function void
    722 cut_rect_vertical(Rect rect, f32 at, Rect *top, Rect *bot)
    723 {
    724 	at = MIN(at, rect.size.h);
    725 	if (top) {
    726 		*top = rect;
    727 		top->size.h = at;
    728 	}
    729 	if (bot) {
    730 		*bot = rect;
    731 		bot->pos.y  += at;
    732 		bot->size.h -= at;
    733 	}
    734 }
    735 
    736 function f64
    737 parse_f64(s8 s)
    738 {
    739 	f64 integral = 0, fractional = 0, sign = 1;
    740 
    741 	if (s.len && *s.data == '-') {
    742 		sign = -1;
    743 		s.data++;
    744 		s.len--;
    745 	}
    746 
    747 	while (s.len && *s.data != '.') {
    748 		integral *= 10;
    749 		integral += *s.data - '0';
    750 		s.data++;
    751 		s.len--;
    752 	}
    753 
    754 	if (*s.data == '.') { s.data++; s.len--; }
    755 
    756 	while (s.len) {
    757 		ASSERT(s.data[s.len - 1] != '.');
    758 		fractional /= 10;
    759 		fractional += (f64)(s.data[--s.len] - '0') / 10.0;
    760 	}
    761 	f64 result = sign * (integral + fractional);
    762 	return result;
    763 }
    764 
    765 function FileWatchDirectory *
    766 lookup_file_watch_directory(FileWatchContext *ctx, u64 hash)
    767 {
    768 	FileWatchDirectory *result = 0;
    769 	for (u32 i = 0; i < ctx->count; i++) {
    770 		FileWatchDirectory *test = ctx->data + i;
    771 		if (test->hash == hash) {
    772 			result = test;
    773 			break;
    774 		}
    775 	}
    776 	return result;
    777 }
    778 
    779 function void
    780 fill_kronecker_sub_matrix(i32 *out, i32 out_stride, i32 scale, i32 *b, uv2 b_dim)
    781 {
    782 	f32x4 vscale = dup_f32x4(scale);
    783 	for (u32 i = 0; i < b_dim.y; i++) {
    784 		for (u32 j = 0; j < b_dim.x; j += 4, b += 4) {
    785 			f32x4 vb = cvt_i32x4_f32x4(load_i32x4(b));
    786 			store_i32x4(cvt_f32x4_i32x4(mul_f32x4(vscale, vb)), out + j);
    787 		}
    788 		out += out_stride;
    789 	}
    790 }
    791 
    792 /* NOTE: this won't check for valid space/etc and assumes row major order */
    793 function void
    794 kronecker_product(i32 *out, i32 *a, uv2 a_dim, i32 *b, uv2 b_dim)
    795 {
    796 	uv2 out_dim = {.x = a_dim.x * b_dim.x, .y = a_dim.y * b_dim.y};
    797 	ASSERT(out_dim.y % 4 == 0);
    798 	for (u32 i = 0; i < a_dim.y; i++) {
    799 		i32 *vout = out;
    800 		for (u32 j = 0; j < a_dim.x; j++, a++) {
    801 			fill_kronecker_sub_matrix(vout, out_dim.y, *a, b, b_dim);
    802 			vout += b_dim.y;
    803 		}
    804 		out += out_dim.y * b_dim.x;
    805 	}
    806 }
    807 
    808 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */
    809 function i32 *
    810 make_hadamard_transpose(Arena *a, u32 dim)
    811 {
    812 	read_only local_persist	i32 hadamard_12_12_transpose[] = {
    813 		1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
    814 		1, -1, -1,  1, -1, -1, -1,  1,  1,  1, -1,  1,
    815 		1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1, -1,
    816 		1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1,
    817 		1,  1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,
    818 		1,  1,  1, -1,  1, -1, -1,  1, -1, -1, -1,  1,
    819 		1,  1,  1,  1, -1,  1, -1, -1,  1, -1, -1, -1,
    820 		1, -1,  1,  1,  1, -1,  1, -1, -1,  1, -1, -1,
    821 		1, -1, -1,  1,  1,  1, -1,  1, -1, -1,  1, -1,
    822 		1, -1, -1, -1,  1,  1,  1, -1,  1, -1, -1,  1,
    823 		1,  1, -1, -1, -1,  1,  1,  1, -1,  1, -1, -1,
    824 		1, -1,  1, -1, -1, -1,  1,  1,  1, -1,  1, -1,
    825 	};
    826 
    827 	i32 *result = 0;
    828 
    829 	b32 power_of_2     = ISPOWEROF2(dim);
    830 	b32 multiple_of_12 = dim % 12 == 0;
    831 	b32 multiple_of_20 = dim % 20 == 0;
    832 	iz elements        = dim * dim;
    833 
    834 	u32 base_dim = 0;
    835 	if (power_of_2) {
    836 		base_dim  = dim;
    837 	} else if (multiple_of_20) {
    838 		/* TODO(rnp): hadamard 20 */
    839 		//base_dim  = 20;
    840 		//dim      /= 20;
    841 	} else if (multiple_of_12 && dim != 36) {
    842 		base_dim  = 12;
    843 		dim      /= 12;
    844 	}
    845 
    846 	if (ISPOWEROF2(dim) && base_dim && arena_capacity(a, i32) >= elements * (1 + (dim != base_dim))) {
    847 		result = push_array(a, i32, elements);
    848 
    849 		Arena tmp = *a;
    850 		i32 *m = dim == base_dim ? result : push_array(&tmp, i32, elements);
    851 
    852 		#define IND(i, j) ((i) * dim + (j))
    853 		m[0] = 1;
    854 		for (u32 k = 1; k < dim; k *= 2) {
    855 			for (u32 i = 0; i < k; i++) {
    856 				for (u32 j = 0; j < k; j++) {
    857 					i32 val = m[IND(i, j)];
    858 					m[IND(i + k, j)]     =  val;
    859 					m[IND(i, j + k)]     =  val;
    860 					m[IND(i + k, j + k)] = -val;
    861 				}
    862 			}
    863 		}
    864 		#undef IND
    865 
    866 		if (dim != base_dim) {
    867 			ASSERT(base_dim == 12);
    868 			i32 *m2 = hadamard_12_12_transpose;
    869 			kronecker_product(result,
    870 			                  m,  (uv2){.x = dim,      .y = dim},
    871 			                  m2, (uv2){.x = base_dim, .y = base_dim});
    872 		}
    873 	}
    874 
    875 	return result;
    876 }