ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

util.c (19627B)


      1 /* See LICENSE for license details. */
      2 #if   COMPILER_CLANG
      3   #pragma GCC diagnostic ignored "-Winitializer-overrides"
      4 #elif COMPILER_GCC
      5   #pragma GCC diagnostic ignored "-Woverride-init"
      6 #endif
      7 
      8 #define zero_struct(s) memory_clear((s), 0, sizeof(*(s)))
      9 function void *
     10 memory_clear(void *restrict p_, u8 c, u64 size)
     11 {
     12 	u8 *p = p_;
     13 	while (size > 0) p[--size] = c;
     14 	return p;
     15 }
     16 
     17 function b32
     18 memory_equal(void *restrict left, void *restrict right, uz n)
     19 {
     20 	u8 *a = left, *b = right;
     21 	b32 result = 1;
     22 	for (; result && n; n--)
     23 		result &= *a++ == *b++;
     24 	return result;
     25 }
     26 
     27 #define mem_copy memory_copy
     28 function void
     29 memory_copy(void *restrict dest, void *restrict src, uz n)
     30 {
     31 	u8 *s = src, *d = dest;
     32 	#ifdef __AVX512BW__
     33 	{
     34 		for (; n >= 64; n -= 64, s += 64, d += 64)
     35 			_mm512_storeu_epi8(d, _mm512_loadu_epi8(s));
     36 		__mmask64 k = _cvtu64_mask64(_bzhi_u64(-1ULL, n));
     37 		_mm512_mask_storeu_epi8(d, k, _mm512_maskz_loadu_epi8(k, s));
     38 	}
     39 	#else
     40 		for (; n; n--) *d++ = *s++;
     41 	#endif
     42 }
     43 
     44 /* IMPORTANT: this function may fault if dest, src, and n are not multiples of 64 */
     45 function void
     46 memory_copy_non_temporal(void *restrict dest, void *restrict src, uz n)
     47 {
     48 	assume(((u64)dest & 63) == 0);
     49 	assume(((u64)src  & 63) == 0);
     50 	assume(((u64)n    & 63) == 0);
     51 	u8 *s = src, *d = dest;
     52 
     53 	#if defined(__AVX512BW__)
     54 	{
     55 		for (; n >= 64; n -= 64, s += 64, d += 64)
     56 			_mm512_stream_si512((__m512i *)d, _mm512_stream_load_si512((__m512i *)s));
     57 	}
     58 	#elif defined(__AVX2__)
     59 	{
     60 		for (; n >= 32; n -= 32, s += 32, d += 32)
     61 			_mm256_stream_si256((__m256i *)d, _mm256_stream_load_si256((__m256i *)s));
     62 	}
     63 	#elif ARCH_ARM64 && !COMPILER_MSVC
     64 	{
     65 		asm volatile (
     66 			"cbz  %2, 2f\n"
     67 			"1: ldnp q0, q1, [%1]\n"
     68 			"subs %2, %2, #32\n"
     69 			"add  %1, %1, #32\n"
     70 			"stnp q0, q1, [%0]\n"
     71 			"add  %0, %0, #32\n"
     72 			"b.ne 1b\n"
     73 			"2:"
     74 			:  "+r"(d), "+r"(s), "+r"(n)
     75 			:: "memory", "v0", "v1"
     76 		);
     77 	}
     78 	#else
     79 		mem_copy(d, s, n);
     80 	#endif
     81 }
     82 
     83 function void
     84 mem_move(u8 *dest, u8 *src, uz n)
     85 {
     86 	if (dest < src) mem_copy(dest, src, n);
     87 	else            while (n) { n--; dest[n] = src[n]; }
     88 }
     89 
     90 function void *
     91 memory_scan_backwards(void *memory, u8 byte, iz n)
     92 {
     93 	void *result = 0;
     94 	u8   *s      = memory;
     95 	while (n > 0) if (s[--n] == byte) { result = s + n; break; }
     96 	return result;
     97 }
     98 
     99 function Arena
    100 arena_from_memory(void *memory, u64 size)
    101 {
    102 	Arena result;
    103 	result.beg = memory;
    104 	result.end = result.beg + size;
    105 	return result;
    106 }
    107 
    108 function void *
    109 align_pointer_up(void *p, uz alignment)
    110 {
    111 	uz padding = -(u64)p & (alignment - 1);
    112 	void *result = (u8 *)p + padding;
    113 	return result;
    114 }
    115 
    116 function void *
    117 arena_aligned_start(Arena a, uz alignment)
    118 {
    119 	return align_pointer_up(a.beg, alignment);
    120 }
    121 
    122 #define arena_capacity(a, t) arena_capacity_(a, sizeof(t), alignof(t))
    123 function iz
    124 arena_capacity_(Arena *a, iz size, uz alignment)
    125 {
    126 	iz available = a->end - (u8 *)arena_aligned_start(*a, alignment);
    127 	iz result    = available / size;
    128 	return result;
    129 }
    130 
    131 function u8 *
    132 arena_commit(Arena *a, iz size)
    133 {
    134 	assert(a->end - a->beg >= size);
    135 	u8 *result = a->beg;
    136 	a->beg += size;
    137 	return result;
    138 }
    139 
    140 function void
    141 arena_pop(Arena *a, iz length)
    142 {
    143 	a->beg -= length;
    144 }
    145 
    146 typedef enum {
    147 	ArenaAllocateFlags_NoZero = 1 << 0,
    148 } ArenaAllocateFlags;
    149 
    150 typedef struct {
    151 	iz size;
    152 	uz align;
    153 	iz count;
    154 	ArenaAllocateFlags flags;
    155 } ArenaAllocateInfo;
    156 
    157 #define arena_alloc(a, ...)         arena_alloc_(a, (ArenaAllocateInfo){.align = 8, .count = 1, ##__VA_ARGS__})
    158 #define push_array(a, t, n)         (t *)arena_alloc(a, .size = sizeof(t), .align = alignof(t), .count = n)
    159 #define push_array_no_zero(a, t, n) (t *)arena_alloc(a, .size = sizeof(t), .align = alignof(t), .count = n, .flags = ArenaAllocateFlags_NoZero)
    160 #define push_struct(a, t)           push_array(a, t, 1)
    161 #define push_struct_no_zero(a, t)   push_array_no_zero(a, t, 1)
    162 
    163 function void *
    164 arena_alloc_(Arena *a, ArenaAllocateInfo info)
    165 {
    166 	void *result = 0;
    167 	if (a->beg) {
    168 		u8 *start = arena_aligned_start(*a, info.align);
    169 		iz available = a->end - start;
    170 		assert((available >= 0 && info.count <= available / info.size));
    171 		asan_unpoison_region(start, info.count * info.size);
    172 		a->beg = start + info.count * info.size;
    173 		result = start;
    174 		if ((info.flags & ArenaAllocateFlags_NoZero) == 0)
    175 			result = memory_clear(start, 0, info.count * info.size);
    176 	}
    177 	return result;
    178 }
    179 
    180 function Arena
    181 sub_arena(Arena *a, iz size, uz align)
    182 {
    183 	Arena result = {.beg = arena_alloc(a, .size = size, .align = align, .flags = ArenaAllocateFlags_NoZero)};
    184 	result.end   = result.beg + size;
    185 	return result;
    186 }
    187 
    188 function Arena
    189 sub_arena_end(Arena *a, iz len, uz align)
    190 {
    191 	Arena result;
    192 	result.beg = (u8 *)((u64)(a->end - len) & ~(align - 1)),
    193 	result.end = a->end,
    194 
    195 	a->end = result.beg;
    196 	assert(a->end >= a->beg);
    197 
    198 	return result;
    199 }
    200 
    201 function TempArena
    202 begin_temp_arena(Arena *a)
    203 {
    204 	TempArena result = {.arena = a, .original_arena = *a};
    205 	return result;
    206 }
    207 
    208 function void
    209 end_temp_arena(TempArena ta)
    210 {
    211 	Arena *a = ta.arena;
    212 	if (a) {
    213 		assert(a->beg >= ta.original_arena.beg);
    214 		*a = ta.original_arena;
    215 	}
    216 }
    217 
    218 
    219 enum { DA_INITIAL_CAP = 16 };
    220 
    221 #define da_index(it, s) ((it) - (s)->data)
    222 #define da_reserve(a, s, n) \
    223   (s)->data = da_reserve_((a), (s)->data, &(s)->capacity, (s)->count + n, \
    224                           _Alignof(typeof(*(s)->data)), sizeof(*(s)->data))
    225 
    226 #define da_append_count(a, s, items, item_count) do { \
    227 	da_reserve((a), (s), (item_count));                                             \
    228 	mem_copy((s)->data + (s)->count, (items), sizeof(*(items)) * (uz)(item_count)); \
    229 	(s)->count += (item_count);                                                     \
    230 } while (0)
    231 
    232 #define da_push(a, s) \
    233   ((typeof((s)->data))memory_clear((s)->count == (s)->capacity  \
    234     ? da_reserve(a, s, 1),      \
    235       (s)->data + (s)->count++  \
    236     : (s)->data + (s)->count++, 0, sizeof(*(s)->data)))
    237 
    238 function void *
    239 da_reserve_(Arena *a, void *data, da_count *capacity, da_count needed, u64 align, i64 size)
    240 {
    241 	da_count cap = *capacity;
    242 
    243 	/* NOTE(rnp): handle both 0 initialized DAs and DAs that need to be moved (they started
    244 	 * on the stack or someone allocated something in the middle of the arena during usage) */
    245 	if (!data || a->beg != (u8 *)data + cap * size) {
    246 		void *copy = arena_alloc(a, .size = size, .align = align, .count = cap);
    247 		if (data) mem_copy(copy, data, (uz)(cap * size));
    248 		data = copy;
    249 	}
    250 
    251 	if (!cap) cap = DA_INITIAL_CAP;
    252 	while (cap < needed) cap *= 2;
    253 	arena_alloc(a, .size = size, .align = align, .count = cap - *capacity);
    254 	*capacity = cap;
    255 	return data;
    256 }
    257 
    258 function u32
    259 utf8_encode(u8 *out, u32 cp)
    260 {
    261 	u32 result = 1;
    262 	if (cp <= 0x7F) {
    263 		out[0] = cp & 0x7F;
    264 	} else if (cp <= 0x7FF) {
    265 		result = 2;
    266 		out[0] = ((cp >>  6) & 0x1F) | 0xC0;
    267 		out[1] = ((cp >>  0) & 0x3F) | 0x80;
    268 	} else if (cp <= 0xFFFF) {
    269 		result = 3;
    270 		out[0] = ((cp >> 12) & 0x0F) | 0xE0;
    271 		out[1] = ((cp >>  6) & 0x3F) | 0x80;
    272 		out[2] = ((cp >>  0) & 0x3F) | 0x80;
    273 	} else if (cp <= 0x10FFFF) {
    274 		result = 4;
    275 		out[0] = ((cp >> 18) & 0x07) | 0xF0;
    276 		out[1] = ((cp >> 12) & 0x3F) | 0x80;
    277 		out[2] = ((cp >>  6) & 0x3F) | 0x80;
    278 		out[3] = ((cp >>  0) & 0x3F) | 0x80;
    279 	} else {
    280 		out[0] = '?';
    281 	}
    282 	return result;
    283 }
    284 
    285 function UnicodeDecode
    286 utf16_decode(u16 *data, iz length)
    287 {
    288 	UnicodeDecode result = {.cp = U32_MAX};
    289 	if (length) {
    290 		result.consumed = 1;
    291 		result.cp = data[0];
    292 		if (length > 1 && BETWEEN(data[0], 0xD800u, 0xDBFFu)
    293 		               && BETWEEN(data[1], 0xDC00u, 0xDFFFu))
    294 		{
    295 			result.consumed = 2;
    296 			result.cp = ((data[0] - 0xD800u) << 10u) | ((data[1] - 0xDC00u) + 0x10000u);
    297 		}
    298 	}
    299 	return result;
    300 }
    301 
    302 function u32
    303 utf16_encode(u16 *out, u32 cp)
    304 {
    305 	u32 result = 1;
    306 	if (cp == U32_MAX) {
    307 		out[0] = '?';
    308 	} else if (cp < 0x10000u) {
    309 		out[0] = (u16)cp;
    310 	} else {
    311 		u32 value = cp - 0x10000u;
    312 		out[0] = (u16)(0xD800u + (value >> 10u));
    313 		out[1] = (u16)(0xDC00u + (value & 0x3FFu));
    314 		result = 2;
    315 	}
    316 	return result;
    317 }
    318 
    319 function Stream
    320 stream_from_buffer(u8 *buffer, u32 capacity)
    321 {
    322 	Stream result = {.data = buffer, .cap = (i32)capacity};
    323 	return result;
    324 }
    325 
    326 function Stream
    327 stream_alloc(Arena *a, i32 cap)
    328 {
    329 	Stream result = stream_from_buffer(arena_commit(a, cap), (u32)cap);
    330 	return result;
    331 }
    332 
    333 function s8
    334 stream_to_s8(Stream *s)
    335 {
    336 	s8 result = s8("");
    337 	if (!s->errors) result = (s8){.len = s->widx, .data = s->data};
    338 	return result;
    339 }
    340 
    341 function void
    342 stream_reset(Stream *s, i32 index)
    343 {
    344 	s->errors = s->cap <= index;
    345 	if (!s->errors)
    346 		s->widx = index;
    347 }
    348 
    349 function void
    350 stream_commit(Stream *s, i32 count)
    351 {
    352 	s->errors |= !BETWEEN(s->widx + count, 0, s->cap);
    353 	if (!s->errors)
    354 		s->widx += count;
    355 }
    356 
    357 function void
    358 stream_append(Stream *s, void *data, iz count)
    359 {
    360 	s->errors |= (s->cap - s->widx) < count;
    361 	if (!s->errors) {
    362 		mem_copy(s->data + s->widx, data, (uz)count);
    363 		s->widx += (i32)count;
    364 	}
    365 }
    366 
    367 function void
    368 stream_append_byte(Stream *s, u8 b)
    369 {
    370 	stream_append(s, &b, 1);
    371 }
    372 
    373 function void
    374 stream_pad(Stream *s, u8 b, i32 n)
    375 {
    376 	while (n > 0) stream_append_byte(s, b), n--;
    377 }
    378 
    379 function void
    380 stream_append_s8(Stream *s, s8 str)
    381 {
    382 	stream_append(s, str.data, str.len);
    383 }
    384 
    385 #define stream_append_s8s(s, ...) stream_append_s8s_(s, arg_list(s8, ##__VA_ARGS__))
    386 function void
    387 stream_append_s8s_(Stream *s, s8 *strs, iz count)
    388 {
    389 	for (iz i = 0; i < count; i++)
    390 		stream_append(s, strs[i].data, strs[i].len);
    391 }
    392 
    393 function void
    394 stream_append_u64_width(Stream *s, u64 n, u64 min_width)
    395 {
    396 	u8 tmp[64];
    397 	u8 *end = tmp + sizeof(tmp);
    398 	u8 *beg = end;
    399 	min_width = MIN(sizeof(tmp), min_width);
    400 
    401 	do { *--beg = (u8)('0' + (n % 10)); } while (n /= 10);
    402 	while (end - beg > 0 && (uz)(end - beg) < min_width)
    403 		*--beg = '0';
    404 
    405 	stream_append(s, beg, end - beg);
    406 }
    407 
    408 function void
    409 stream_append_u64(Stream *s, u64 n)
    410 {
    411 	stream_append_u64_width(s, n, 0);
    412 }
    413 
    414 function void
    415 stream_append_hex_u64_width(Stream *s, u64 n, iz width)
    416 {
    417 	assert(width <= 16);
    418 	if (!s->errors) {
    419 		u8  buf[16];
    420 		u8 *end = buf + sizeof(buf);
    421 		u8 *beg = end;
    422 		while (n) {
    423 			*--beg = (u8)"0123456789abcdef"[n & 0x0F];
    424 			n >>= 4;
    425 		}
    426 		while (end - beg < width)
    427 			*--beg = '0';
    428 		stream_append(s, beg, end - beg);
    429 	}
    430 }
    431 
    432 function void
    433 stream_append_hex_u64(Stream *s, u64 n)
    434 {
    435 	stream_append_hex_u64_width(s, n, 2);
    436 }
    437 
    438 function void
    439 stream_append_i64(Stream *s, i64 n)
    440 {
    441 	if (n < 0) {
    442 		stream_append_byte(s, '-');
    443 		n *= -1;
    444 	}
    445 	stream_append_u64(s, (u64)n);
    446 }
    447 
    448 function void
    449 stream_append_f64(Stream *s, f64 f, u64 prec)
    450 {
    451 	if (f < 0) {
    452 		stream_append_byte(s, '-');
    453 		f *= -1;
    454 	}
    455 
    456 	/* NOTE: round last digit */
    457 	f += 0.5f / (f64)prec;
    458 
    459 	if (f >= (f64)(-1UL >> 1)) {
    460 		stream_append_s8(s, s8("inf"));
    461 	} else {
    462 		u64 integral = (u64)f;
    463 		u64 fraction = (u64)((f - (f64)integral) * (f64)prec);
    464 		stream_append_u64(s, integral);
    465 		stream_append_byte(s, '.');
    466 		for (u64 i = prec / 10; i > 1; i /= 10) {
    467 			if (i > fraction)
    468 				stream_append_byte(s, '0');
    469 		}
    470 		stream_append_u64(s, fraction);
    471 	}
    472 }
    473 
    474 function void
    475 stream_append_f64_e(Stream *s, f64 f)
    476 {
    477 	/* TODO: there should be a better way of doing this */
    478 	#if 0
    479 	/* NOTE: we ignore subnormal numbers for now */
    480 	union { f64 f; u64 u; } u = {.f = f};
    481 	i32 exponent = ((u.u >> 52) & 0x7ff) - 1023;
    482 	f32 log_10_of_2 = 0.301f;
    483 	i32 scale       = (exponent * log_10_of_2);
    484 	/* NOTE: normalize f */
    485 	for (i32 i = ABS(scale); i > 0; i--)
    486 		f *= (scale > 0)? 0.1f : 10.0f;
    487 	#else
    488 	i32 scale = 0;
    489 	if (f != 0) {
    490 		while (f > 1) {
    491 			f *= 0.1f;
    492 			scale++;
    493 		}
    494 		while (f < 1) {
    495 			f *= 10.0f;
    496 			scale--;
    497 		}
    498 	}
    499 	#endif
    500 
    501 	u32 prec = 100;
    502 	stream_append_f64(s, f, prec);
    503 	stream_append_byte(s, 'e');
    504 	stream_append_byte(s, scale >= 0? '+' : '-');
    505 	for (u32 i = prec / 10; i > 1; i /= 10)
    506 		stream_append_byte(s, '0');
    507 	stream_append_u64(s, (u64)Abs(scale));
    508 }
    509 
    510 function void
    511 stream_append_v2(Stream *s, v2 v)
    512 {
    513 	stream_append_byte(s, '{');
    514 	stream_append_f64(s, v.x, 100);
    515 	stream_append_s8(s, s8(", "));
    516 	stream_append_f64(s, v.y, 100);
    517 	stream_append_byte(s, '}');
    518 }
    519 
    520 function Stream
    521 arena_stream(Arena a)
    522 {
    523 	Stream result = {0};
    524 	result.data   = a.beg;
    525 	result.cap    = (i32)(a.end - a.beg);
    526 
    527 	/* TODO(rnp): no idea what to do here if we want to maintain the ergonomics */
    528 	asan_unpoison_region(result.data, result.cap);
    529 
    530 	return result;
    531 }
    532 
    533 function s8
    534 arena_stream_commit(Arena *a, Stream *s)
    535 {
    536 	ASSERT(s->data == a->beg);
    537 	s8 result = stream_to_s8(s);
    538 	arena_commit(a, result.len);
    539 	return result;
    540 }
    541 
    542 function s8
    543 arena_stream_commit_zero(Arena *a, Stream *s)
    544 {
    545 	b32 error = s->errors || s->widx == s->cap;
    546 	if (!error)
    547 		s->data[s->widx] = 0;
    548 	s8 result = stream_to_s8(s);
    549 	arena_commit(a, result.len + 1);
    550 	return result;
    551 }
    552 
    553 function s8
    554 arena_stream_commit_and_reset(Arena *arena, Stream *s)
    555 {
    556 	s8 result = arena_stream_commit_zero(arena, s);
    557 	*s = arena_stream(*arena);
    558 	return result;
    559 }
    560 
    561 #if !defined(XXH_IMPLEMENTATION)
    562 # define XXH_INLINE_ALL
    563 # define XXH_IMPLEMENTATION
    564 # define XXH_STATIC_LINKING_ONLY
    565 # include "external/xxhash.h"
    566 #endif
    567 
    568 function u128
    569 u128_hash_from_data(void *data, uz size)
    570 {
    571 	u128 result = {0};
    572 	XXH128_hash_t hash = XXH3_128bits_withSeed(data, size, 4969);
    573 	mem_copy(&result, &hash, sizeof(result));
    574 	return result;
    575 }
    576 
    577 function u64
    578 u64_hash_from_s8(s8 v)
    579 {
    580 	u64 result = XXH3_64bits_withSeed(v.data, (uz)v.len, 4969);
    581 	return result;
    582 }
    583 
    584 function s8
    585 c_str_to_s8(char *cstr)
    586 {
    587 	s8 result = {.data = (u8 *)cstr};
    588 	if (cstr) { while (*cstr) { result.len++; cstr++; } }
    589 	return result;
    590 }
    591 
    592 function b32
    593 s8_equal(s8 a, s8 b)
    594 {
    595 	b32 result = a.len == b.len;
    596 	for (iz i = 0; result && i < a.len; i++)
    597 		result = a.data[i] == b.data[i];
    598 	return result;
    599 }
    600 
    601 /* NOTE(rnp): returns < 0 if byte is not found */
    602 function iz
    603 s8_scan_backwards(s8 s, u8 byte)
    604 {
    605 	iz result = (u8 *)memory_scan_backwards(s.data, byte, s.len) - s.data;
    606 	return result;
    607 }
    608 
    609 function s8
    610 s8_cut_head(s8 s, iz cut)
    611 {
    612 	s8 result = s;
    613 	if (cut > 0) {
    614 		result.data += cut;
    615 		result.len  -= cut;
    616 	}
    617 	return result;
    618 }
    619 
    620 function s8
    621 s8_alloc(Arena *a, iz len)
    622 {
    623 	s8 result = {.data = push_array(a, u8, len), .len = len};
    624 	return result;
    625 }
    626 
    627 function s8
    628 s16_to_s8(Arena *a, s16 in)
    629 {
    630 	s8 result = s8("");
    631 	if (in.len) {
    632 		iz commit = in.len * 4;
    633 		iz length = 0;
    634 		u8 *data = arena_commit(a, commit + 1);
    635 		u16 *beg = in.data;
    636 		u16 *end = in.data + in.len;
    637 		while (beg < end) {
    638 			UnicodeDecode decode = utf16_decode(beg, end - beg);
    639 			length += utf8_encode(data + length, decode.cp);
    640 			beg    += decode.consumed;
    641 		}
    642 		data[length] = 0;
    643 		result = (s8){.len = length, .data = data};
    644 		arena_pop(a, commit - length);
    645 	}
    646 	return result;
    647 }
    648 
    649 function s16
    650 s8_to_s16(Arena *a, s8 in)
    651 {
    652 	s16 result = {0};
    653 	if (in.len) {
    654 		iz required = 2 * in.len + 1;
    655 		u16 *data   = push_array(a, u16, required);
    656 		iz length   = 0;
    657 		/* TODO(rnp): utf8_decode */
    658 		for (iz i = 0; i < in.len; i++) {
    659 			u32 cp  = in.data[i];
    660 			length += utf16_encode(data + length, cp);
    661 		}
    662 		result = (s16){.len = length, .data = data};
    663 		arena_pop(a, required - length);
    664 	}
    665 	return result;
    666 }
    667 
    668 #define push_s8_from_parts(a, j, ...) push_s8_from_parts_((a), (j), arg_list(s8, __VA_ARGS__))
    669 function s8
    670 push_s8_from_parts_(Arena *arena, s8 joiner, s8 *parts, iz count)
    671 {
    672 	iz length = joiner.len * (count - 1);
    673 	for (iz i = 0; i < count; i++)
    674 		length += parts[i].len;
    675 
    676 	s8 result = {.len = length, .data = arena_commit(arena, length + 1)};
    677 
    678 	iz offset = 0;
    679 	for (iz i = 0; i < count; i++) {
    680 		if (i != 0) {
    681 			mem_copy(result.data + offset, joiner.data, (uz)joiner.len);
    682 			offset += joiner.len;
    683 		}
    684 		mem_copy(result.data + offset, parts[i].data, (uz)parts[i].len);
    685 		offset += parts[i].len;
    686 	}
    687 	result.data[result.len] = 0;
    688 
    689 	return result;
    690 }
    691 
    692 function s8
    693 push_s8(Arena *a, s8 str)
    694 {
    695 	s8 result   = s8_alloc(a, str.len + 1);
    696 	result.len -= 1;
    697 	mem_copy(result.data, str.data, (uz)result.len);
    698 	return result;
    699 }
    700 
    701 function force_inline u32
    702 round_down_power_of_2(u32 a)
    703 {
    704 	u32 result = 0x80000000UL >> clz_u32(a);
    705 	return result;
    706 }
    707 
    708 function force_inline u32
    709 round_up_power_of_2(u32 a)
    710 {
    711 	u32 result = 0x80000000UL >> (clz_u32(a - 1) - 1);
    712 	return result;
    713 }
    714 
    715 function force_inline iz
    716 round_up_to(iz value, iz multiple)
    717 {
    718 	iz result = value;
    719 	if (value % multiple != 0)
    720 		result += multiple - value % multiple;
    721 	return result;
    722 }
    723 
    724 function void
    725 split_rect_horizontal(Rect rect, f32 fraction, Rect *left, Rect *right)
    726 {
    727 	if (left) {
    728 		left->pos    = rect.pos;
    729 		left->size.h = rect.size.h;
    730 		left->size.w = rect.size.w * fraction;
    731 	}
    732 	if (right) {
    733 		right->pos    = rect.pos;
    734 		right->pos.x += rect.size.w * fraction;
    735 		right->size.h = rect.size.h;
    736 		right->size.w = rect.size.w * (1.0f - fraction);
    737 	}
    738 }
    739 
    740 function void
    741 split_rect_vertical(Rect rect, f32 fraction, Rect *top, Rect *bot)
    742 {
    743 	if (top) {
    744 		top->pos    = rect.pos;
    745 		top->size.w = rect.size.w;
    746 		top->size.h = rect.size.h * fraction;
    747 	}
    748 	if (bot) {
    749 		bot->pos    = rect.pos;
    750 		bot->pos.y += rect.size.h * fraction;
    751 		bot->size.w = rect.size.w;
    752 		bot->size.h = rect.size.h * (1.0f - fraction);
    753 	}
    754 }
    755 
    756 function void
    757 cut_rect_horizontal(Rect rect, f32 at, Rect *left, Rect *right)
    758 {
    759 	at = MIN(at, rect.size.w);
    760 	if (left) {
    761 		*left = rect;
    762 		left->size.w = at;
    763 	}
    764 	if (right) {
    765 		*right = rect;
    766 		right->pos.x  += at;
    767 		right->size.w -= at;
    768 	}
    769 }
    770 
    771 function void
    772 cut_rect_vertical(Rect rect, f32 at, Rect *top, Rect *bot)
    773 {
    774 	at = MIN(at, rect.size.h);
    775 	if (top) {
    776 		*top = rect;
    777 		top->size.h = at;
    778 	}
    779 	if (bot) {
    780 		*bot = rect;
    781 		bot->pos.y  += at;
    782 		bot->size.h -= at;
    783 	}
    784 }
    785 
    786 function NumberConversion
    787 integer_from_s8(s8 raw)
    788 {
    789 	read_only local_persist alignas(64) i8 lut[64] = {
    790 		 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
    791 		-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    792 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    793 		-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    794 	};
    795 
    796 	NumberConversion result = {.unparsed = raw};
    797 
    798 	iz  i     = 0;
    799 	i64 scale = 1;
    800 	if (raw.len > 0 && raw.data[0] == '-') {
    801 		scale = -1;
    802 		i     =  1;
    803 	}
    804 
    805 	b32 hex = 0;
    806 	if (raw.len - i > 2 && raw.data[i] == '0' && (raw.data[1] == 'x' || raw.data[1] == 'X')) {
    807 		hex = 1;
    808 		i += 2;
    809 	}
    810 
    811 	#define integer_conversion_body(radix, clamp) do {\
    812 		for (; i < raw.len; i++) {\
    813 			i64 value = lut[Min((u8)(raw.data[i] - (u8)'0'), clamp)];\
    814 			if (value >= 0) {\
    815 				if (result.U64 > (U64_MAX - (u64)value) / radix) {\
    816 					result.result = NumberConversionResult_OutOfRange;\
    817 					result.U64    = U64_MAX;\
    818 					return result;\
    819 				} else {\
    820 					result.U64 = radix * result.U64 + (u64)value;\
    821 				}\
    822 			} else {\
    823 				break;\
    824 			}\
    825 		}\
    826 	} while (0)
    827 
    828 	if (hex) integer_conversion_body(16u, 63u);
    829 	else     integer_conversion_body(10u, 15u);
    830 
    831 	#undef integer_conversion_body
    832 
    833 	result.unparsed = (s8){.len = raw.len - i, .data = raw.data + i};
    834 	result.result   = i > 0 ? NumberConversionResult_Success : NumberConversionResult_Invalid;
    835 	result.kind     = NumberConversionKind_Integer;
    836 	if (scale < 0) result.U64 = 0 - result.U64;
    837 
    838 	return result;
    839 }
    840 
    841 function NumberConversion
    842 number_from_s8(s8 s)
    843 {
    844 	NumberConversion result  = {.unparsed = s};
    845 	NumberConversion integer = integer_from_s8(s);
    846 	if (integer.result == NumberConversionResult_Success) {
    847 		if (integer.unparsed.len != 0 && integer.unparsed.data[0] == '.') {
    848 			s = integer.unparsed;
    849 			s.data++;
    850 			s.len--;
    851 
    852 			while (s.len > 0 && s.data[s.len - 1] == '0') s.len--;
    853 
    854 			NumberConversion fractional = integer_from_s8(s);
    855 			if (fractional.result == NumberConversionResult_Success || s.len == 0) {
    856 				result.F64 = (f64)fractional.U64;
    857 
    858 				u64 divisor = (u64)(fractional.unparsed.data - s.data);
    859 				while (divisor > 0) { result.F64 /= 10.0; divisor--; }
    860 
    861 				result.F64 += (f64)integer.S64;
    862 
    863 				result.result   = NumberConversionResult_Success;
    864 				result.kind     = NumberConversionKind_Float;
    865 				result.unparsed = fractional.unparsed;
    866 			}
    867 		} else {
    868 			result = integer;
    869 		}
    870 	}
    871 	return result;
    872 }