ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

util.c (19503B)


      1 /* See LICENSE for license details. */
      2 #define zero_struct(s) mem_clear(s, 0, sizeof(*s))
      3 function void *
      4 mem_clear(void *restrict p_, u8 c, iz size)
      5 {
      6 	u8 *p = p_;
      7 	while (size > 0) p[--size] = c;
      8 	return p;
      9 }
     10 
     11 function void
     12 mem_copy(void *restrict dest, void *restrict src, uz n)
     13 {
     14 	u8 *s = src, *d = dest;
     15 	for (; n; n--) *d++ = *s++;
     16 }
     17 
     18 function void
     19 mem_move(u8 *dest, u8 *src, iz n)
     20 {
     21 	if (dest < src) mem_copy(dest, src, n);
     22 	else            while (n) { n--; dest[n] = src[n]; }
     23 }
     24 
     25 
     26 function u8 *
     27 arena_commit(Arena *a, iz size)
     28 {
     29 	ASSERT(a->end - a->beg >= size);
     30 	u8 *result = a->beg;
     31 	a->beg += size;
     32 	return result;
     33 }
     34 
     35 function void
     36 arena_pop(Arena *a, iz length)
     37 {
     38 	a->beg -= length;
     39 }
     40 
     41 #define push_array(a, t, n) (t *)arena_alloc(a, sizeof(t), _Alignof(t), n)
     42 #define push_struct(a, t)   (t *)arena_alloc(a, sizeof(t), _Alignof(t), 1)
     43 function void *
     44 arena_alloc(Arena *a, iz len, iz align, iz count)
     45 {
     46 	/* NOTE: special case 0 arena */
     47 	if (a->beg == 0)
     48 		return 0;
     49 
     50 	iz padding   = -(uintptr_t)a->beg & (align - 1);
     51 	iz available = a->end - a->beg - padding;
     52 	if (available < 0 || count > available / len)
     53 		ASSERT(0 && "arena OOM\n");
     54 	void *p = a->beg + padding;
     55 	a->beg += padding + count * len;
     56 	/* TODO: Performance? */
     57 	return mem_clear(p, 0, count * len);
     58 }
     59 
     60 #define arena_capacity(a, t) arena_capacity_(a, sizeof(t), _Alignof(t))
     61 function iz
     62 arena_capacity_(Arena *a, iz size, iz alignment)
     63 {
     64 	iz padding   = -(uintptr_t)a->beg & (alignment - 1);
     65 	iz available = a->end - a->beg - padding;
     66 	iz result    = available / size;
     67 	return result;
     68 }
     69 
     70 enum { DA_INITIAL_CAP = 4 };
     71 #define da_reserve(a, s, n) \
     72   (s)->data = da_reserve_((a), (s)->data, &(s)->capacity, (s)->count + n, \
     73                           _Alignof(typeof(*(s)->data)), sizeof(*(s)->data))
     74 
     75 #define da_push(a, s) \
     76   ((s)->count == (s)->capacity  \
     77     ? da_reserve(a, s, 1),      \
     78       (s)->data + (s)->count++  \
     79     : (s)->data + (s)->count++)
     80 
     81 function void *
     82 da_reserve_(Arena *a, void *data, iz *capacity, iz needed, iz align, iz size)
     83 {
     84 	iz cap = *capacity;
     85 
     86 	/* NOTE(rnp): handle both 0 initialized DAs and DAs that need to be moved (they started
     87 	 * on the stack or someone allocated something in the middle of the arena during usage) */
     88 	if (!data || a->beg != (u8 *)data + cap * size) {
     89 		void *copy = arena_alloc(a, size, align, cap);
     90 		if (data) mem_copy(copy, data, cap * size);
     91 		data = copy;
     92 	}
     93 
     94 	if (!cap) cap = DA_INITIAL_CAP;
     95 	while (cap < needed) cap *= 2;
     96 	arena_alloc(a, size, align, cap - *capacity);
     97 	*capacity = cap;
     98 	return data;
     99 }
    100 
    101 function Arena
    102 sub_arena(Arena *a, iz len, iz align)
    103 {
    104 	Arena result = {0};
    105 
    106 	iz padding = -(uintptr_t)a->beg & (align - 1);
    107 	result.beg   = a->beg + padding;
    108 	result.end   = result.beg + len;
    109 	arena_commit(a, len + padding);
    110 
    111 	return result;
    112 }
    113 
    114 function TempArena
    115 begin_temp_arena(Arena *a)
    116 {
    117 	TempArena result = {.arena = a, .old_beg = a->beg};
    118 	return result;
    119 }
    120 
    121 function void
    122 end_temp_arena(TempArena ta)
    123 {
    124 	Arena *a = ta.arena;
    125 	if (a) {
    126 		assert(a->beg >= ta.old_beg);
    127 		a->beg = ta.old_beg;
    128 	}
    129 }
    130 
    131 function u32
    132 utf8_encode(u8 *out, u32 cp)
    133 {
    134 	u32 result = 1;
    135 	if (cp <= 0x7F) {
    136 		out[0] = cp & 0x7F;
    137 	} else if (cp <= 0x7FF) {
    138 		result = 2;
    139 		out[0] = ((cp >>  6) & 0x1F) | 0xC0;
    140 		out[1] = ((cp >>  0) & 0x3F) | 0x80;
    141 	} else if (cp <= 0xFFFF) {
    142 		result = 3;
    143 		out[0] = ((cp >> 12) & 0x0F) | 0xE0;
    144 		out[1] = ((cp >>  6) & 0x3F) | 0x80;
    145 		out[2] = ((cp >>  0) & 0x3F) | 0x80;
    146 	} else if (cp <= 0x10FFFF) {
    147 		result = 4;
    148 		out[0] = ((cp >> 18) & 0x07) | 0xF0;
    149 		out[1] = ((cp >> 12) & 0x3F) | 0x80;
    150 		out[2] = ((cp >>  6) & 0x3F) | 0x80;
    151 		out[3] = ((cp >>  0) & 0x3F) | 0x80;
    152 	} else {
    153 		out[0] = '?';
    154 	}
    155 	return result;
    156 }
    157 
    158 function UnicodeDecode
    159 utf16_decode(u16 *data, iz length)
    160 {
    161 	UnicodeDecode result = {.cp = U32_MAX};
    162 	if (length) {
    163 		result.consumed = 1;
    164 		result.cp = data[0];
    165 		if (length > 1 && BETWEEN(data[0], 0xD800, 0xDBFF)
    166 		               && BETWEEN(data[1], 0xDC00, 0xDFFF))
    167 		{
    168 			result.consumed = 2;
    169 			result.cp = ((data[0] - 0xD800) << 10) | ((data[1] - 0xDC00) + 0x10000);
    170 		}
    171 	}
    172 	return result;
    173 }
    174 
    175 function u32
    176 utf16_encode(u16 *out, u32 cp)
    177 {
    178 	u32 result = 1;
    179 	if (cp == U32_MAX) {
    180 		out[0] = '?';
    181 	} else if (cp < 0x10000) {
    182 		out[0] = cp;
    183 	} else {
    184 		u32 value = cp - 0x10000;
    185 		out[0] = 0xD800 + (value >> 10u);
    186 		out[1] = 0xDC00 + (value & 0x3FFu);
    187 		result = 2;
    188 	}
    189 	return result;
    190 }
    191 
    192 function Stream
    193 stream_alloc(Arena *a, iz cap)
    194 {
    195 	Stream result = {.cap = cap};
    196 	result.data = push_array(a, u8, cap);
    197 	return result;
    198 }
    199 
    200 function s8
    201 stream_to_s8(Stream *s)
    202 {
    203 	s8 result = s8("");
    204 	if (!s->errors) result = (s8){.len = s->widx, .data = s->data};
    205 	return result;
    206 }
    207 
    208 function void
    209 stream_reset(Stream *s, iz index)
    210 {
    211 	s->errors = s->cap <= index;
    212 	if (!s->errors)
    213 		s->widx = index;
    214 }
    215 
    216 function void
    217 stream_commit(Stream *s, iz count)
    218 {
    219 	s->errors |= !BETWEEN(s->widx + count, 0, s->cap);
    220 	if (!s->errors)
    221 		s->widx += count;
    222 }
    223 
    224 function void
    225 stream_append(Stream *s, void *data, iz count)
    226 {
    227 	s->errors |= (s->cap - s->widx) < count;
    228 	if (!s->errors) {
    229 		mem_copy(s->data + s->widx, data, count);
    230 		s->widx += count;
    231 	}
    232 }
    233 
    234 function void
    235 stream_append_byte(Stream *s, u8 b)
    236 {
    237 	stream_append(s, &b, 1);
    238 }
    239 
    240 function void
    241 stream_pad(Stream *s, u8 b, i32 n)
    242 {
    243 	while (n > 0) stream_append_byte(s, b), n--;
    244 }
    245 
    246 function void
    247 stream_append_s8(Stream *s, s8 str)
    248 {
    249 	stream_append(s, str.data, str.len);
    250 }
    251 
    252 #define stream_append_s8s(s, ...) stream_append_s8s_(s, arg_list(s8, ##__VA_ARGS__))
    253 function void
    254 stream_append_s8s_(Stream *s, s8 *strs, iz count)
    255 {
    256 	for (iz i = 0; i < count; i++)
    257 		stream_append(s, strs[i].data, strs[i].len);
    258 }
    259 
    260 function void
    261 stream_append_u64(Stream *s, u64 n)
    262 {
    263 	u8 tmp[64];
    264 	u8 *end = tmp + sizeof(tmp);
    265 	u8 *beg = end;
    266 	do { *--beg = '0' + (n % 10); } while (n /= 10);
    267 	stream_append(s, beg, end - beg);
    268 }
    269 
    270 function void
    271 stream_append_hex_u64(Stream *s, u64 n)
    272 {
    273 	if (!s->errors) {
    274 		u8  buf[16];
    275 		u8 *end = buf + sizeof(buf);
    276 		u8 *beg = end;
    277 		while (n) {
    278 			*--beg = "0123456789abcdef"[n & 0x0F];
    279 			n >>= 4;
    280 		}
    281 		while (end - beg < 2)
    282 			*--beg = '0';
    283 		stream_append(s, beg, end - beg);
    284 	}
    285 }
    286 
    287 function void
    288 stream_append_i64(Stream *s, i64 n)
    289 {
    290 	if (n < 0) {
    291 		stream_append_byte(s, '-');
    292 		n *= -1;
    293 	}
    294 	stream_append_u64(s, n);
    295 }
    296 
    297 function void
    298 stream_append_f64(Stream *s, f64 f, i64 prec)
    299 {
    300 	if (f < 0) {
    301 		stream_append_byte(s, '-');
    302 		f *= -1;
    303 	}
    304 
    305 	/* NOTE: round last digit */
    306 	f += 0.5f / prec;
    307 
    308 	if (f >= (f64)(-1UL >> 1)) {
    309 		stream_append_s8(s, s8("inf"));
    310 	} else {
    311 		u64 integral = f;
    312 		u64 fraction = (f - integral) * prec;
    313 		stream_append_u64(s, integral);
    314 		stream_append_byte(s, '.');
    315 		for (i64 i = prec / 10; i > 1; i /= 10) {
    316 			if (i > fraction)
    317 				stream_append_byte(s, '0');
    318 		}
    319 		stream_append_u64(s, fraction);
    320 	}
    321 }
    322 
    323 function void
    324 stream_append_f64_e(Stream *s, f64 f)
    325 {
    326 	/* TODO: there should be a better way of doing this */
    327 	#if 0
    328 	/* NOTE: we ignore subnormal numbers for now */
    329 	union { f64 f; u64 u; } u = {.f = f};
    330 	i32 exponent = ((u.u >> 52) & 0x7ff) - 1023;
    331 	f32 log_10_of_2 = 0.301f;
    332 	i32 scale       = (exponent * log_10_of_2);
    333 	/* NOTE: normalize f */
    334 	for (i32 i = ABS(scale); i > 0; i--)
    335 		f *= (scale > 0)? 0.1f : 10.0f;
    336 	#else
    337 	i32 scale = 0;
    338 	if (f != 0) {
    339 		while (f > 1) {
    340 			f *= 0.1f;
    341 			scale++;
    342 		}
    343 		while (f < 1) {
    344 			f *= 10.0f;
    345 			scale--;
    346 		}
    347 	}
    348 	#endif
    349 
    350 	i32 prec = 100;
    351 	stream_append_f64(s, f, prec);
    352 	stream_append_byte(s, 'e');
    353 	stream_append_byte(s, scale >= 0? '+' : '-');
    354 	for (i32 i = prec / 10; i > 1; i /= 10)
    355 		stream_append_byte(s, '0');
    356 	stream_append_u64(s, ABS(scale));
    357 }
    358 
    359 function void
    360 stream_append_v2(Stream *s, v2 v)
    361 {
    362 	stream_append_byte(s, '{');
    363 	stream_append_f64(s, v.x, 100);
    364 	stream_append_s8(s, s8(", "));
    365 	stream_append_f64(s, v.y, 100);
    366 	stream_append_byte(s, '}');
    367 }
    368 
    369 function Stream
    370 arena_stream(Arena a)
    371 {
    372 	Stream result = {0};
    373 	result.data   = a.beg;
    374 	result.cap    = a.end - a.beg;
    375 	return result;
    376 }
    377 
    378 function s8
    379 arena_stream_commit(Arena *a, Stream *s)
    380 {
    381 	ASSERT(s->data == a->beg);
    382 	s8 result = stream_to_s8(s);
    383 	arena_commit(a, result.len);
    384 	return result;
    385 }
    386 
    387 function s8
    388 arena_stream_commit_zero(Arena *a, Stream *s)
    389 {
    390 	b32 error = s->errors || s->widx == s->cap;
    391 	if (!error)
    392 		s->data[s->widx] = 0;
    393 	s8 result = stream_to_s8(s);
    394 	arena_commit(a, result.len + 1);
    395 	return result;
    396 }
    397 
    398 /* NOTE(rnp): FNV-1a hash */
    399 function u64
    400 s8_hash(s8 v)
    401 {
    402 	u64 h = 0x3243f6a8885a308d; /* digits of pi */
    403 	for (; v.len; v.len--) {
    404 		h ^= v.data[v.len - 1] & 0xFF;
    405 		h *= 1111111111111111111; /* random prime */
    406 	}
    407 	return h;
    408 }
    409 
    410 function s8
    411 c_str_to_s8(char *cstr)
    412 {
    413 	s8 result = {.data = (u8 *)cstr};
    414 	if (cstr) { while (*cstr) { result.len++; cstr++; } }
    415 	return result;
    416 }
    417 
    418 /* NOTE(rnp): returns < 0 if byte is not found */
    419 function iz
    420 s8_scan_backwards(s8 s, u8 byte)
    421 {
    422 	iz result = s.len;
    423 	while (result && s.data[result - 1] != byte) result--;
    424 	result--;
    425 	return result;
    426 }
    427 
    428 function s8
    429 s8_cut_head(s8 s, iz cut)
    430 {
    431 	s8 result = s;
    432 	if (cut > 0) {
    433 		result.data += cut;
    434 		result.len  -= cut;
    435 	}
    436 	return result;
    437 }
    438 
    439 function s8
    440 s8_alloc(Arena *a, iz len)
    441 {
    442 	s8 result = {.data = push_array(a, u8, len), .len = len};
    443 	return result;
    444 }
    445 
    446 function s8
    447 s16_to_s8(Arena *a, s16 in)
    448 {
    449 	s8 result = s8("");
    450 	if (in.len) {
    451 		iz commit = in.len * 4;
    452 		iz length = 0;
    453 		u8 *data = arena_commit(a, commit + 1);
    454 		u16 *beg = in.data;
    455 		u16 *end = in.data + in.len;
    456 		while (beg < end) {
    457 			UnicodeDecode decode = utf16_decode(beg, end - beg);
    458 			length += utf8_encode(data + length, decode.cp);
    459 			beg    += decode.consumed;
    460 		}
    461 		data[length] = 0;
    462 		result = (s8){.len = length, .data = data};
    463 		arena_pop(a, commit - length);
    464 	}
    465 	return result;
    466 }
    467 
    468 function s16
    469 s8_to_s16(Arena *a, s8 in)
    470 {
    471 	s16 result = {0};
    472 	if (in.len) {
    473 		iz required = 2 * in.len + 1;
    474 		u16 *data   = push_array(a, u16, required);
    475 		iz length   = 0;
    476 		/* TODO(rnp): utf8_decode */
    477 		for (iz i = 0; i < in.len; i++) {
    478 			u32 cp  = in.data[i];
    479 			length += utf16_encode(data + length, cp);
    480 		}
    481 		result = (s16){.len = length, .data = data};
    482 		arena_pop(a, required - length);
    483 	}
    484 	return result;
    485 }
    486 
    487 function s8
    488 push_s8(Arena *a, s8 str)
    489 {
    490 	s8 result = s8_alloc(a, str.len);
    491 	mem_copy(result.data, str.data, result.len);
    492 	return result;
    493 }
    494 
    495 function s8
    496 push_s8_zero(Arena *a, s8 str)
    497 {
    498 	s8 result   = s8_alloc(a, str.len + 1);
    499 	result.len -= 1;
    500 	mem_copy(result.data, str.data, result.len);
    501 	return result;
    502 }
    503 
    504 function u32
    505 round_down_power_of_2(u32 a)
    506 {
    507 	u32 result = 0x80000000UL >> clz_u32(a);
    508 	return result;
    509 }
    510 
    511 function u32
    512 round_up_power_of_2(u32 a)
    513 {
    514 	u32 result = 0x80000000UL >> (clz_u32(a - 1) - 1);
    515 	return result;
    516 }
    517 
    518 function iz
    519 round_up_to(iz value, iz multiple)
    520 {
    521 	iz result = value;
    522 	if (value % multiple != 0)
    523 		result += multiple - value % multiple;
    524 	return result;
    525 }
    526 
    527 function b32
    528 uv2_equal(uv2 a, uv2 b)
    529 {
    530 	return a.x == b.x && a.y == b.y;
    531 }
    532 
    533 function b32
    534 uv3_equal(uv3 a, uv3 b)
    535 {
    536 	return a.x == b.x && a.y == b.y && a.z == b.z;
    537 }
    538 
    539 function v3
    540 cross(v3 a, v3 b)
    541 {
    542 	v3 result = {
    543 		.x = a.y * b.z - a.z * b.y,
    544 		.y = a.z * b.x - a.x * b.z,
    545 		.z = a.x * b.y - a.y * b.x,
    546 	};
    547 	return result;
    548 }
    549 
    550 function v3
    551 sub_v3(v3 a, v3 b)
    552 {
    553 	v3 result = {
    554 		.x = a.x - b.x,
    555 		.y = a.y - b.y,
    556 		.z = a.z - b.z,
    557 	};
    558 	return result;
    559 }
    560 
    561 function f32
    562 length_v3(v3 a)
    563 {
    564 	f32 result = a.x * a.x + a.y * a.y + a.z * a.z;
    565 	return result;
    566 }
    567 
    568 function v3
    569 normalize_v3(v3 a)
    570 {
    571 	f32 length = length_v3(a);
    572 	v3 result = {.x = a.x / length, .y = a.y / length, .z = a.z / length};
    573 	return result;
    574 }
    575 
    576 function v2
    577 clamp_v2_rect(v2 v, Rect r)
    578 {
    579 	v2 result = v;
    580 	result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x);
    581 	result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y);
    582 	return result;
    583 }
    584 
    585 function v2
    586 add_v2(v2 a, v2 b)
    587 {
    588 	v2 result = {
    589 		.x = a.x + b.x,
    590 		.y = a.y + b.y,
    591 	};
    592 	return result;
    593 }
    594 
    595 function v2
    596 sub_v2(v2 a, v2 b)
    597 {
    598 	v2 result = {
    599 		.x = a.x - b.x,
    600 		.y = a.y - b.y,
    601 	};
    602 	return result;
    603 }
    604 
    605 function v2
    606 scale_v2(v2 a, f32 scale)
    607 {
    608 	v2 result = {
    609 		.x = a.x * scale,
    610 		.y = a.y * scale,
    611 	};
    612 	return result;
    613 }
    614 
    615 function v2
    616 mul_v2(v2 a, v2 b)
    617 {
    618 	v2 result = {
    619 		.x = a.x * b.x,
    620 		.y = a.y * b.y,
    621 	};
    622 	return result;
    623 }
    624 
    625 function v2
    626 div_v2(v2 a, v2 b)
    627 {
    628 	v2 result;
    629 	result.x = a.x / b.x;
    630 	result.y = a.y / b.y;
    631 	return result;
    632 }
    633 
    634 
    635 function v2
    636 floor_v2(v2 a)
    637 {
    638 	v2 result;
    639 	result.x = (i32)a.x;
    640 	result.y = (i32)a.y;
    641 	return result;
    642 }
    643 
    644 function f32
    645 magnitude_v2(v2 a)
    646 {
    647 	f32 result = sqrt_f32(a.x * a.x + a.y * a.y);
    648 	return result;
    649 }
    650 
    651 function uv4
    652 uv4_from_u32_array(u32 v[4])
    653 {
    654 	uv4 result;
    655 	result.E[0] = v[0];
    656 	result.E[1] = v[1];
    657 	result.E[2] = v[2];
    658 	result.E[3] = v[3];
    659 	return result;
    660 }
    661 
    662 function b32
    663 uv4_equal(uv4 a, uv4 b)
    664 {
    665 	return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w;
    666 }
    667 
    668 function v4
    669 v4_from_f32_array(f32 v[4])
    670 {
    671 	v4 result;
    672 	result.E[0] = v[0];
    673 	result.E[1] = v[1];
    674 	result.E[2] = v[2];
    675 	result.E[3] = v[3];
    676 	return result;
    677 }
    678 
    679 function v4
    680 sub_v4(v4 a, v4 b)
    681 {
    682 	v4 result;
    683 	result.x = a.x - b.x;
    684 	result.y = a.y - b.y;
    685 	result.z = a.z - b.z;
    686 	result.w = a.w - b.w;
    687 	return result;
    688 }
    689 
    690 function void
    691 split_rect_horizontal(Rect rect, f32 fraction, Rect *left, Rect *right)
    692 {
    693 	if (left) {
    694 		left->pos    = rect.pos;
    695 		left->size.h = rect.size.h;
    696 		left->size.w = rect.size.w * fraction;
    697 	}
    698 	if (right) {
    699 		right->pos    = rect.pos;
    700 		right->pos.x += rect.size.w * fraction;
    701 		right->size.h = rect.size.h;
    702 		right->size.w = rect.size.w * (1.0f - fraction);
    703 	}
    704 }
    705 
    706 function void
    707 split_rect_vertical(Rect rect, f32 fraction, Rect *top, Rect *bot)
    708 {
    709 	if (top) {
    710 		top->pos    = rect.pos;
    711 		top->size.w = rect.size.w;
    712 		top->size.h = rect.size.h * fraction;
    713 	}
    714 	if (bot) {
    715 		bot->pos    = rect.pos;
    716 		bot->pos.y += rect.size.h * fraction;
    717 		bot->size.w = rect.size.w;
    718 		bot->size.h = rect.size.h * (1.0f - fraction);
    719 	}
    720 }
    721 
    722 function void
    723 cut_rect_horizontal(Rect rect, f32 at, Rect *left, Rect *right)
    724 {
    725 	at = MIN(at, rect.size.w);
    726 	if (left) {
    727 		*left = rect;
    728 		left->size.w = at;
    729 	}
    730 	if (right) {
    731 		*right = rect;
    732 		right->pos.x  += at;
    733 		right->size.w -= at;
    734 	}
    735 }
    736 
    737 function void
    738 cut_rect_vertical(Rect rect, f32 at, Rect *top, Rect *bot)
    739 {
    740 	at = MIN(at, rect.size.h);
    741 	if (top) {
    742 		*top = rect;
    743 		top->size.h = at;
    744 	}
    745 	if (bot) {
    746 		*bot = rect;
    747 		bot->pos.y  += at;
    748 		bot->size.h -= at;
    749 	}
    750 }
    751 
    752 function f64
    753 parse_f64(s8 s)
    754 {
    755 	f64 integral = 0, fractional = 0, sign = 1;
    756 
    757 	if (s.len && *s.data == '-') {
    758 		sign = -1;
    759 		s.data++;
    760 		s.len--;
    761 	}
    762 
    763 	while (s.len && *s.data != '.') {
    764 		integral *= 10;
    765 		integral += *s.data - '0';
    766 		s.data++;
    767 		s.len--;
    768 	}
    769 
    770 	if (*s.data == '.') { s.data++; s.len--; }
    771 
    772 	while (s.len) {
    773 		ASSERT(s.data[s.len - 1] != '.');
    774 		fractional /= 10;
    775 		fractional += (f64)(s.data[--s.len] - '0') / 10.0;
    776 	}
    777 	f64 result = sign * (integral + fractional);
    778 	return result;
    779 }
    780 
    781 function FileWatchDirectory *
    782 lookup_file_watch_directory(FileWatchContext *ctx, u64 hash)
    783 {
    784 	FileWatchDirectory *result = 0;
    785 	for (u32 i = 0; i < ctx->count; i++) {
    786 		FileWatchDirectory *test = ctx->data + i;
    787 		if (test->hash == hash) {
    788 			result = test;
    789 			break;
    790 		}
    791 	}
    792 	return result;
    793 }
    794 
    795 function void
    796 fill_kronecker_sub_matrix(i32 *out, i32 out_stride, i32 scale, i32 *b, uv2 b_dim)
    797 {
    798 	f32x4 vscale = dup_f32x4(scale);
    799 	for (u32 i = 0; i < b_dim.y; i++) {
    800 		for (u32 j = 0; j < b_dim.x; j += 4, b += 4) {
    801 			f32x4 vb = cvt_i32x4_f32x4(load_i32x4(b));
    802 			store_i32x4(cvt_f32x4_i32x4(mul_f32x4(vscale, vb)), out + j);
    803 		}
    804 		out += out_stride;
    805 	}
    806 }
    807 
    808 /* NOTE: this won't check for valid space/etc and assumes row major order */
    809 function void
    810 kronecker_product(i32 *out, i32 *a, uv2 a_dim, i32 *b, uv2 b_dim)
    811 {
    812 	uv2 out_dim = {.x = a_dim.x * b_dim.x, .y = a_dim.y * b_dim.y};
    813 	ASSERT(out_dim.y % 4 == 0);
    814 	for (u32 i = 0; i < a_dim.y; i++) {
    815 		i32 *vout = out;
    816 		for (u32 j = 0; j < a_dim.x; j++, a++) {
    817 			fill_kronecker_sub_matrix(vout, out_dim.y, *a, b, b_dim);
    818 			vout += b_dim.y;
    819 		}
    820 		out += out_dim.y * b_dim.x;
    821 	}
    822 }
    823 
    824 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */
    825 function i32 *
    826 make_hadamard_transpose(Arena *a, u32 dim)
    827 {
    828 	read_only local_persist	i32 hadamard_12_12_transpose[] = {
    829 		1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
    830 		1, -1, -1,  1, -1, -1, -1,  1,  1,  1, -1,  1,
    831 		1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1, -1,
    832 		1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1,
    833 		1,  1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,
    834 		1,  1,  1, -1,  1, -1, -1,  1, -1, -1, -1,  1,
    835 		1,  1,  1,  1, -1,  1, -1, -1,  1, -1, -1, -1,
    836 		1, -1,  1,  1,  1, -1,  1, -1, -1,  1, -1, -1,
    837 		1, -1, -1,  1,  1,  1, -1,  1, -1, -1,  1, -1,
    838 		1, -1, -1, -1,  1,  1,  1, -1,  1, -1, -1,  1,
    839 		1,  1, -1, -1, -1,  1,  1,  1, -1,  1, -1, -1,
    840 		1, -1,  1, -1, -1, -1,  1,  1,  1, -1,  1, -1,
    841 	};
    842 
    843 	read_only local_persist i32 hadamard_20_20_transpose[] = {
    844 		1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
    845 		1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1,
    846 		1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1,
    847 		1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,
    848 		1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,
    849 		1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1,
    850 		1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1,
    851 		1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1,
    852 		1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1,
    853 		1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,
    854 		1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1,
    855 		1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,
    856 		1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1,
    857 		1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,
    858 		1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,
    859 		1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,
    860 		1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,
    861 		1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1,
    862 		1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1,
    863 		1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,
    864 	};
    865 
    866 	i32 *result = 0;
    867 
    868 	b32 power_of_2     = ISPOWEROF2(dim);
    869 	b32 multiple_of_12 = dim % 12 == 0;
    870 	b32 multiple_of_20 = dim % 20 == 0;
    871 	iz elements        = dim * dim;
    872 
    873 	u32 base_dim = 0;
    874 	if (power_of_2) {
    875 		base_dim  = dim;
    876 	} else if (multiple_of_20 && ISPOWEROF2(dim / 20)) {
    877 		base_dim  = 20;
    878 		dim      /= 20;
    879 	} else if (multiple_of_12 && ISPOWEROF2(dim / 12)) {
    880 		base_dim  = 12;
    881 		dim      /= 12;
    882 	}
    883 
    884 	if (ISPOWEROF2(dim) && base_dim && arena_capacity(a, i32) >= elements * (1 + (dim != base_dim))) {
    885 		result = push_array(a, i32, elements);
    886 
    887 		Arena tmp = *a;
    888 		i32 *m = dim == base_dim ? result : push_array(&tmp, i32, elements);
    889 
    890 		#define IND(i, j) ((i) * dim + (j))
    891 		m[0] = 1;
    892 		for (u32 k = 1; k < dim; k *= 2) {
    893 			for (u32 i = 0; i < k; i++) {
    894 				for (u32 j = 0; j < k; j++) {
    895 					i32 val = m[IND(i, j)];
    896 					m[IND(i + k, j)]     =  val;
    897 					m[IND(i, j + k)]     =  val;
    898 					m[IND(i + k, j + k)] = -val;
    899 				}
    900 			}
    901 		}
    902 		#undef IND
    903 
    904 		i32 *m2 = 0;
    905 		uv2 m2_dim;
    906 		switch (base_dim) {
    907 		case 12:{ m2 = hadamard_12_12_transpose; m2_dim = (uv2){{12, 12}}; }break;
    908 		case 20:{ m2 = hadamard_20_20_transpose; m2_dim = (uv2){{20, 20}}; }break;
    909 		}
    910 		if (m2) kronecker_product(result, m, (uv2){{dim, dim}}, m2, m2_dim);
    911 	}
    912 
    913 	return result;
    914 }