ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

util.c (23965B)


      1 /* See LICENSE for license details. */
      2 #if   COMPILER_CLANG
      3   #pragma GCC diagnostic ignored "-Winitializer-overrides"
      4 #elif COMPILER_GCC
      5   #pragma GCC diagnostic ignored "-Woverride-init"
      6 #endif
      7 
      8 #define zero_struct(s) memory_clear((s), 0, sizeof(*(s)))
      9 function void *
     10 memory_clear(void *restrict p_, u8 c, u64 size)
     11 {
     12 	u8 *p = p_;
     13 	while (size > 0) p[--size] = c;
     14 	return p;
     15 }
     16 
     17 function b32
     18 memory_equal(void *restrict left, void *restrict right, u64 n)
     19 {
     20 	u8 *a = left, *b = right;
     21 	b32 result = 1;
     22 	for (; result && n; n--)
     23 		result &= *a++ == *b++;
     24 	return result;
     25 }
     26 
     27 #define mem_copy memory_copy
     28 function void
     29 memory_copy(void *restrict dest, void *restrict src, u64 n)
     30 {
     31 	u8 *s = src, *d = dest;
     32 	#ifdef __AVX512BW__
     33 	{
     34 		for (; n >= 64; n -= 64, s += 64, d += 64)
     35 			_mm512_storeu_epi8(d, _mm512_loadu_epi8(s));
     36 		__mmask64 k = _cvtu64_mask64(_bzhi_u64(-1ULL, n));
     37 		_mm512_mask_storeu_epi8(d, k, _mm512_maskz_loadu_epi8(k, s));
     38 	}
     39 	#else
     40 		for (; n; n--) *d++ = *s++;
     41 	#endif
     42 }
     43 
     44 /* IMPORTANT: this function may fault if dest, src, and n are not multiples of 64 */
     45 function void
     46 memory_copy_non_temporal(void *restrict dest, void *restrict src, u64 n)
     47 {
     48 	assume(((u64)dest & 63) == 0);
     49 	assume(((u64)src  & 63) == 0);
     50 	assume(((u64)n    & 63) == 0);
     51 	u8 *s = src, *d = dest;
     52 
     53 	#if defined(__AVX512BW__)
     54 	{
     55 		for (; n >= 64; n -= 64, s += 64, d += 64)
     56 			_mm512_stream_si512((__m512i *)d, _mm512_stream_load_si512((__m512i *)s));
     57 	}
     58 	#elif defined(__AVX2__)
     59 	{
     60 		for (; n >= 32; n -= 32, s += 32, d += 32)
     61 			_mm256_stream_si256((__m256i *)d, _mm256_stream_load_si256((__m256i *)s));
     62 	}
     63 	#elif ARCH_ARM64 && !COMPILER_MSVC
     64 	{
     65 		asm volatile (
     66 			"cbz  %2, 2f\n"
     67 			"1: ldnp q0, q1, [%1]\n"
     68 			"subs %2, %2, #32\n"
     69 			"add  %1, %1, #32\n"
     70 			"stnp q0, q1, [%0]\n"
     71 			"add  %0, %0, #32\n"
     72 			"b.ne 1b\n"
     73 			"2:"
     74 			:  "+r"(d), "+r"(s), "+r"(n)
     75 			:: "memory", "v0", "v1"
     76 		);
     77 	}
     78 	#else
     79 		memory_copy(d, s, n);
     80 	#endif
     81 }
     82 
     83 function void
     84 memory_move(u8 *dest, u8 *src, u64 n)
     85 {
     86 	if (dest < src) memory_copy(dest, src, n);
     87 	else            while (n) { n--; dest[n] = src[n]; }
     88 }
     89 
     90 function void *
     91 memory_scan_backwards(void *memory, u8 byte, iz n)
     92 {
     93 	void *result = 0;
     94 	u8   *s      = memory;
     95 	while (n > 0) if (s[--n] == byte) { result = s + n; break; }
     96 	return result;
     97 }
     98 
     99 function Arena
    100 arena_from_memory(void *memory, u64 size)
    101 {
    102 	Arena result;
    103 	result.beg = memory;
    104 	result.end = result.beg + size;
    105 	return result;
    106 }
    107 
    108 function void *
    109 align_pointer_up(void *p, uz alignment)
    110 {
    111 	uz padding = -(u64)p & (alignment - 1);
    112 	void *result = (u8 *)p + padding;
    113 	return result;
    114 }
    115 
    116 function void *
    117 arena_aligned_start(Arena a, uz alignment)
    118 {
    119 	return align_pointer_up(a.beg, alignment);
    120 }
    121 
    122 #define arena_capacity(a, t) arena_capacity_(a, sizeof(t), alignof(t))
    123 function iz
    124 arena_capacity_(Arena *a, iz size, uz alignment)
    125 {
    126 	iz available = a->end - (u8 *)arena_aligned_start(*a, alignment);
    127 	iz result    = available / size;
    128 	return result;
    129 }
    130 
    131 function u8 *
    132 arena_commit(Arena *a, iz size)
    133 {
    134 	assert(a->end - a->beg >= size);
    135 	u8 *result = a->beg;
    136 	a->beg += size;
    137 	return result;
    138 }
    139 
    140 function void
    141 arena_pop(Arena *a, iz length)
    142 {
    143 	a->beg -= length;
    144 }
    145 
    146 typedef enum {
    147 	ArenaAllocateFlags_NoZero = 1 << 0,
    148 } ArenaAllocateFlags;
    149 
    150 typedef struct {
    151 	iz size;
    152 	uz align;
    153 	iz count;
    154 	ArenaAllocateFlags flags;
    155 } ArenaAllocateInfo;
    156 
    157 #define arena_alloc(a, ...)         arena_alloc_(a, (ArenaAllocateInfo){.align = 8, .count = 1, ##__VA_ARGS__})
    158 #define push_array(a, t, n)         (t *)arena_alloc(a, .size = sizeof(t), .align = alignof(t), .count = n)
    159 #define push_array_no_zero(a, t, n) (t *)arena_alloc(a, .size = sizeof(t), .align = alignof(t), .count = n, .flags = ArenaAllocateFlags_NoZero)
    160 #define push_struct(a, t)           push_array(a, t, 1)
    161 #define push_struct_no_zero(a, t)   push_array_no_zero(a, t, 1)
    162 
    163 function void *
    164 arena_alloc_(Arena *a, ArenaAllocateInfo info)
    165 {
    166 	void *result = 0;
    167 	if (a->beg) {
    168 		u8 *start = arena_aligned_start(*a, info.align);
    169 		iz available = a->end - start;
    170 		assert((available >= 0 && info.count <= available / info.size));
    171 		asan_unpoison_region(start, info.count * info.size);
    172 		a->beg = start + info.count * info.size;
    173 		result = start;
    174 		if ((info.flags & ArenaAllocateFlags_NoZero) == 0)
    175 			result = memory_clear(start, 0, info.count * info.size);
    176 	}
    177 	return result;
    178 }
    179 
    180 function Arena
    181 sub_arena(Arena *a, iz size, uz align)
    182 {
    183 	Arena result = {.beg = arena_alloc(a, .size = size, .align = align, .flags = ArenaAllocateFlags_NoZero)};
    184 	result.end   = result.beg + size;
    185 	return result;
    186 }
    187 
    188 function Arena
    189 sub_arena_end(Arena *a, iz len, uz align)
    190 {
    191 	Arena result;
    192 	result.beg = (u8 *)((u64)(a->end - len) & ~(align - 1)),
    193 	result.end = a->end,
    194 
    195 	a->end = result.beg;
    196 	assert(a->end >= a->beg);
    197 
    198 	return result;
    199 }
    200 
    201 function TempArena
    202 begin_temp_arena(Arena *a)
    203 {
    204 	TempArena result = {.arena = a, .original_arena = *a};
    205 	return result;
    206 }
    207 
    208 function void
    209 end_temp_arena(TempArena ta)
    210 {
    211 	Arena *a = ta.arena;
    212 	if (a) {
    213 		assert(a->beg >= ta.original_arena.beg);
    214 		*a = ta.original_arena;
    215 	}
    216 }
    217 
    218 
    219 enum { DA_INITIAL_CAP = 16 };
    220 
    221 #define da_index(it, s) ((it) - (s)->data)
    222 #define da_reserve(a, s, n) \
    223   (s)->data = da_reserve_((a), (s)->data, &(s)->capacity, (s)->count + n, \
    224                           _Alignof(typeof(*(s)->data)), sizeof(*(s)->data))
    225 
    226 #define da_append_count(a, s, items, item_count) do { \
    227 	da_reserve((a), (s), (item_count));                                             \
    228 	mem_copy((s)->data + (s)->count, (items), sizeof(*(items)) * (uz)(item_count)); \
    229 	(s)->count += (item_count);                                                     \
    230 } while (0)
    231 
    232 #define da_push(a, s) \
    233   ((typeof((s)->data))memory_clear((s)->count == (s)->capacity  \
    234     ? da_reserve(a, s, 1),      \
    235       (s)->data + (s)->count++  \
    236     : (s)->data + (s)->count++, 0, sizeof(*(s)->data)))
    237 
    238 function void *
    239 da_reserve_(Arena *a, void *data, da_count *capacity, da_count needed, u64 align, i64 size)
    240 {
    241 	da_count cap = *capacity;
    242 
    243 	/* NOTE(rnp): handle both 0 initialized DAs and DAs that need to be moved (they started
    244 	 * on the stack or someone allocated something in the middle of the arena during usage) */
    245 	if (!data || a->beg != (u8 *)data + cap * size) {
    246 		void *copy = arena_alloc(a, .size = size, .align = align, .count = cap);
    247 		if (data) mem_copy(copy, data, (uz)(cap * size));
    248 		data = copy;
    249 	}
    250 
    251 	if (!cap) cap = DA_INITIAL_CAP;
    252 	while (cap < needed) cap *= 2;
    253 	arena_alloc(a, .size = size, .align = align, .count = cap - *capacity);
    254 	*capacity = cap;
    255 	return data;
    256 }
    257 
    258 function u32
    259 utf8_encode(u8 *out, u32 cp)
    260 {
    261 	u32 result = 1;
    262 	if (cp <= 0x7F) {
    263 		out[0] = cp & 0x7F;
    264 	} else if (cp <= 0x7FF) {
    265 		result = 2;
    266 		out[0] = ((cp >>  6) & 0x1F) | 0xC0;
    267 		out[1] = ((cp >>  0) & 0x3F) | 0x80;
    268 	} else if (cp <= 0xFFFF) {
    269 		result = 3;
    270 		out[0] = ((cp >> 12) & 0x0F) | 0xE0;
    271 		out[1] = ((cp >>  6) & 0x3F) | 0x80;
    272 		out[2] = ((cp >>  0) & 0x3F) | 0x80;
    273 	} else if (cp <= 0x10FFFF) {
    274 		result = 4;
    275 		out[0] = ((cp >> 18) & 0x07) | 0xF0;
    276 		out[1] = ((cp >> 12) & 0x3F) | 0x80;
    277 		out[2] = ((cp >>  6) & 0x3F) | 0x80;
    278 		out[3] = ((cp >>  0) & 0x3F) | 0x80;
    279 	} else {
    280 		out[0] = '?';
    281 	}
    282 	return result;
    283 }
    284 
    285 function UnicodeDecode
    286 utf16_decode(u16 *data, iz length)
    287 {
    288 	UnicodeDecode result = {.cp = U32_MAX};
    289 	if (length) {
    290 		result.consumed = 1;
    291 		result.cp = data[0];
    292 		if (length > 1 && BETWEEN(data[0], 0xD800u, 0xDBFFu)
    293 		               && BETWEEN(data[1], 0xDC00u, 0xDFFFu))
    294 		{
    295 			result.consumed = 2;
    296 			result.cp = ((data[0] - 0xD800u) << 10u) | ((data[1] - 0xDC00u) + 0x10000u);
    297 		}
    298 	}
    299 	return result;
    300 }
    301 
    302 function u32
    303 utf16_encode(u16 *out, u32 cp)
    304 {
    305 	u32 result = 1;
    306 	if (cp == U32_MAX) {
    307 		out[0] = '?';
    308 	} else if (cp < 0x10000u) {
    309 		out[0] = (u16)cp;
    310 	} else {
    311 		u32 value = cp - 0x10000u;
    312 		out[0] = (u16)(0xD800u + (value >> 10u));
    313 		out[1] = (u16)(0xDC00u + (value & 0x3FFu));
    314 		result = 2;
    315 	}
    316 	return result;
    317 }
    318 
    319 function Stream
    320 stream_from_buffer(u8 *buffer, u32 capacity)
    321 {
    322 	Stream result = {.data = buffer, .cap = (i32)capacity};
    323 	return result;
    324 }
    325 
    326 function Stream
    327 stream_alloc(Arena *a, i32 cap)
    328 {
    329 	Stream result = stream_from_buffer(arena_commit(a, cap), (u32)cap);
    330 	return result;
    331 }
    332 
    333 #define stream_to_s8(s) s8_from_str8(stream_to_str8(s))
    334 function str8
    335 stream_to_str8(Stream *s)
    336 {
    337 	str8 result = str8("");
    338 	if (!s->errors) result = (str8){.length = s->widx, .data = s->data};
    339 	return result;
    340 }
    341 
    342 function void
    343 stream_reset(Stream *s, i32 index)
    344 {
    345 	s->errors = s->cap <= index;
    346 	if (!s->errors)
    347 		s->widx = index;
    348 }
    349 
    350 function void
    351 stream_commit(Stream *s, i32 count)
    352 {
    353 	s->errors |= !BETWEEN(s->widx + count, 0, s->cap);
    354 	if (!s->errors)
    355 		s->widx += count;
    356 }
    357 
    358 function void
    359 stream_append(Stream *s, void *data, iz count)
    360 {
    361 	s->errors |= (s->cap - s->widx) < count;
    362 	if (!s->errors) {
    363 		memory_copy(s->data + s->widx, data, (uz)count);
    364 		s->widx += (i32)count;
    365 	}
    366 }
    367 
    368 function void
    369 stream_append_codepoint(Stream *s, u32 codepoint)
    370 {
    371 	u8 buffer[4];
    372 	stream_append(s, buffer, utf8_encode(buffer, codepoint));
    373 }
    374 
    375 // TODO(rnp): replace with handwritten version
    376 #include <stdarg.h>
    377 #include <stdio.h>
    378 function void
    379 stream_appendfv(Stream *s, const char *format, va_list args)
    380 {
    381 	i32 written = vsnprintf((char *)s->data + s->widx, s->cap - s->widx, format, args);
    382 	s->errors |= written > (s->cap - s->widx);
    383 	if (!s->errors) s->widx += written;
    384 }
    385 
    386 function print_format(2, 3) void
    387 stream_appendf(Stream *s, const char *format, ...)
    388 {
    389 	va_list args;
    390 	va_start(args, format);
    391 	stream_appendfv(s, format, args);
    392 	va_end(args);
    393 }
    394 
    395 function void
    396 stream_append_byte(Stream *s, u8 b)
    397 {
    398 	stream_append(s, &b, 1);
    399 }
    400 
    401 function void
    402 stream_pad(Stream *s, u8 b, i32 n)
    403 {
    404 	while (n > 0) stream_append_byte(s, b), n--;
    405 }
    406 
    407 function void
    408 stream_append_s8(Stream *s, s8 str)
    409 {
    410 	stream_append(s, str.data, str.len);
    411 }
    412 
    413 #define stream_append_s8s(s, ...) stream_append_s8s_(s, arg_list(s8, ##__VA_ARGS__))
    414 function void
    415 stream_append_s8s_(Stream *s, s8 *strs, iz count)
    416 {
    417 	for (iz i = 0; i < count; i++)
    418 		stream_append(s, strs[i].data, strs[i].len);
    419 }
    420 
    421 function void
    422 stream_append_u64_width(Stream *s, u64 n, u64 min_width)
    423 {
    424 	u8 tmp[64];
    425 	u8 *end = tmp + sizeof(tmp);
    426 	u8 *beg = end;
    427 	min_width = MIN(sizeof(tmp), min_width);
    428 
    429 	do { *--beg = (u8)('0' + (n % 10)); } while (n /= 10);
    430 	while (end - beg > 0 && (uz)(end - beg) < min_width)
    431 		*--beg = '0';
    432 
    433 	stream_append(s, beg, end - beg);
    434 }
    435 
    436 function void
    437 stream_append_u64(Stream *s, u64 n)
    438 {
    439 	stream_append_u64_width(s, n, 0);
    440 }
    441 
    442 function void
    443 stream_append_hex_u64_width(Stream *s, u64 n, iz width)
    444 {
    445 	assert(width <= 16);
    446 	if (!s->errors) {
    447 		u8  buf[16];
    448 		u8 *end = buf + sizeof(buf);
    449 		u8 *beg = end;
    450 		while (n) {
    451 			*--beg = (u8)"0123456789abcdef"[n & 0x0F];
    452 			n >>= 4;
    453 		}
    454 		while (end - beg < width)
    455 			*--beg = '0';
    456 		stream_append(s, beg, end - beg);
    457 	}
    458 }
    459 
    460 function void
    461 stream_append_hex_u64(Stream *s, u64 n)
    462 {
    463 	stream_append_hex_u64_width(s, n, 2);
    464 }
    465 
    466 function void
    467 stream_append_i64(Stream *s, i64 n)
    468 {
    469 	if (n < 0) {
    470 		stream_append_byte(s, '-');
    471 		n *= -1;
    472 	}
    473 	stream_append_u64(s, (u64)n);
    474 }
    475 
    476 function void
    477 stream_append_f64(Stream *s, f64 f, u64 prec)
    478 {
    479 	if (f < 0) {
    480 		stream_append_byte(s, '-');
    481 		f *= -1;
    482 	}
    483 
    484 	/* NOTE: round last digit */
    485 	f += 0.5f / (f64)prec;
    486 
    487 	if (f >= (f64)(-1UL >> 1)) {
    488 		stream_append_s8(s, s8("inf"));
    489 	} else {
    490 		u64 integral = (u64)f;
    491 		u64 fraction = (u64)((f - (f64)integral) * (f64)prec);
    492 		stream_append_u64(s, integral);
    493 		stream_append_byte(s, '.');
    494 		for (u64 i = prec / 10; i > 1; i /= 10) {
    495 			if (i > fraction)
    496 				stream_append_byte(s, '0');
    497 		}
    498 		stream_append_u64(s, fraction);
    499 	}
    500 }
    501 
    502 function void
    503 stream_append_f64_e(Stream *s, f64 f)
    504 {
    505 	/* TODO: there should be a better way of doing this */
    506 	#if 0
    507 	/* NOTE: we ignore subnormal numbers for now */
    508 	union { f64 f; u64 u; } u = {.f = f};
    509 	i32 exponent = ((u.u >> 52) & 0x7ff) - 1023;
    510 	f32 log_10_of_2 = 0.301f;
    511 	i32 scale       = (exponent * log_10_of_2);
    512 	/* NOTE: normalize f */
    513 	for (i32 i = ABS(scale); i > 0; i--)
    514 		f *= (scale > 0)? 0.1f : 10.0f;
    515 	#else
    516 	i32 scale = 0;
    517 	if (f != 0) {
    518 		while (f > 1) {
    519 			f *= 0.1f;
    520 			scale++;
    521 		}
    522 		while (f < 1) {
    523 			f *= 10.0f;
    524 			scale--;
    525 		}
    526 	}
    527 	#endif
    528 
    529 	u32 prec = 100;
    530 	stream_append_f64(s, f, prec);
    531 	stream_append_byte(s, 'e');
    532 	stream_append_byte(s, scale >= 0? '+' : '-');
    533 	for (u32 i = prec / 10; i > 1; i /= 10)
    534 		stream_append_byte(s, '0');
    535 	stream_append_u64(s, (u64)Abs(scale));
    536 }
    537 
    538 function void
    539 stream_append_v2(Stream *s, v2 v)
    540 {
    541 	stream_append_byte(s, '{');
    542 	stream_append_f64(s, v.x, 100);
    543 	stream_append_s8(s, s8(", "));
    544 	stream_append_f64(s, v.y, 100);
    545 	stream_append_byte(s, '}');
    546 }
    547 
    548 function Stream
    549 arena_stream(Arena a)
    550 {
    551 	Stream result = {0};
    552 	result.data   = a.beg;
    553 	result.cap    = (i32)(a.end - a.beg);
    554 
    555 	/* TODO(rnp): no idea what to do here if we want to maintain the ergonomics */
    556 	asan_unpoison_region(result.data, result.cap);
    557 
    558 	return result;
    559 }
    560 
    561 function s8
    562 arena_stream_commit(Arena *a, Stream *s)
    563 {
    564 	assert(s->data == a->beg);
    565 	s8 result = stream_to_s8(s);
    566 	arena_commit(a, result.len);
    567 	return result;
    568 }
    569 
    570 function s8
    571 arena_stream_commit_zero(Arena *a, Stream *s)
    572 {
    573 	b32 error = s->errors || s->widx == s->cap;
    574 	if (!error)
    575 		s->data[s->widx] = 0;
    576 	s8 result = stream_to_s8(s);
    577 	arena_commit(a, result.len + 1);
    578 	return result;
    579 }
    580 
    581 function s8
    582 arena_stream_commit_and_reset(Arena *arena, Stream *s)
    583 {
    584 	s8 result = arena_stream_commit_zero(arena, s);
    585 	*s = arena_stream(*arena);
    586 	return result;
    587 }
    588 
    589 #if !defined(XXH_IMPLEMENTATION)
    590 # define XXH_INLINE_ALL
    591 # define XXH_IMPLEMENTATION
    592 # define XXH_STATIC_LINKING_ONLY
    593 # include "external/xxhash.h"
    594 #endif
    595 
    596 function u128
    597 u128_hash_from_data(void *data, uz size)
    598 {
    599 	u128 result = {0};
    600 	XXH128_hash_t hash = XXH3_128bits_withSeed(data, size, 4969);
    601 	mem_copy(&result, &hash, sizeof(result));
    602 	return result;
    603 }
    604 
    605 function u64
    606 u64_hash_from_str8_seed(str8 string, u64 seed)
    607 {
    608 	u64 result = XXH3_64bits_withSeed(string.data, (uz)string.length, seed);
    609 	return result;
    610 }
    611 
    612 function u64
    613 u64_hash_from_str8(str8 v)
    614 {
    615 	u64 result = u64_hash_from_str8_seed(v, 4969);
    616 	return result;
    617 }
    618 
    619 function str8
    620 str8_from_c_str(char *cstr)
    621 {
    622 	str8 result = {.data = (u8 *)cstr};
    623 	if (cstr) while (*cstr) cstr++;
    624 	result.length = (u8 *)cstr - result.data;
    625 	return result;
    626 }
    627 
    628 function s8
    629 c_str_to_s8(char *cstr)
    630 {
    631 	str8 s = str8_from_c_str(cstr);
    632 	s8 result = s8_from_str8(s);
    633 	return result;
    634 }
    635 
    636 function str8
    637 str8_range(u8 *start, u8 *one_past_last)
    638 {
    639 	str8 result;
    640 	result.data   = start;
    641 	result.length = one_past_last - start;
    642 	return result;
    643 }
    644 
    645 function str8
    646 str8_skip(str8 s, i64 count)
    647 {
    648 	str8 result = s;
    649 	if (count > 0) {
    650 		result.data   += count;
    651 		result.length -= count;
    652 	}
    653 	return result;
    654 }
    655 
    656 function b32
    657 str8_equal(str8 a, str8 b)
    658 {
    659 	b32 result = a.length == b.length;
    660 	for (i64 i = 0; result && i < a.length; i++)
    661 		result = a.data[i] == b.data[i];
    662 	return result;
    663 }
    664 
    665 function b32
    666 s8_equal(s8 a, s8 b)
    667 {
    668 	return str8_equal(str8_from_s8(a), str8_from_s8(b));
    669 }
    670 
    671 /* NOTE(rnp): returns < 0 if byte is not found */
    672 function i64
    673 str8_scan_backwards(str8 s, u8 byte)
    674 {
    675 	i64 result = (u8 *)memory_scan_backwards(s.data, byte, s.length) - s.data;
    676 	return result;
    677 }
    678 
    679 function str8
    680 str8_cut_head(str8 s, i64 cut)
    681 {
    682 	str8 result = s;
    683 	if (cut > 0) {
    684 		result.data   += cut;
    685 		result.length -= cut;
    686 	}
    687 	result.length = Max(0, result.length);
    688 	return result;
    689 }
    690 
    691 function b32
    692 str8_match(str8 a, str8 b, StringMatchFlags flags)
    693 {
    694 	b32 result = 0;
    695 	if (flags == 0) {
    696 		result = str8_equal(a, b);
    697 	} else if (a.length == b.length || (flags & StringMatchFlag_SloppySize)) {
    698 		result = 1;
    699 		i64 length = Min(a.length, b.length);
    700 		for (i64 it = 0; it < length && result; it++) {
    701 			u8 ab = a.data[it], bb = b.data[it];
    702 			if (flags & StringMatchFlag_CaseInsensitive) {
    703 				ab |= 0x20;
    704 				bb |= 0x20;
    705 			}
    706 			result &= ab == bb;
    707 		}
    708 	}
    709 	return result;
    710 }
    711 
    712 function i64
    713 str8_find_needle(str8 string, str8 needle, StringMatchFlags flags)
    714 {
    715 	u8 *s  = string.data;
    716 	u8 *se = string.data + Max(string.length + 1, needle.length) - needle.length;
    717 	if (needle.length > 0) {
    718 		flags |= StringMatchFlag_SloppySize;
    719 
    720 		u8 nb = needle.data[0];
    721 		if (flags & StringMatchFlag_CaseInsensitive)
    722 			nb |= 0x20;
    723 
    724 		str8 needle_tail = str8_skip(needle, 1);
    725 		u8 *s_opl = string.data + string.length;
    726 		for (; s < se; s++) {
    727 			u8 sb = *s;
    728 			if (flags & StringMatchFlag_CaseInsensitive)
    729 				sb |= 0x20;
    730 
    731 			if (sb == nb && str8_match(str8_range(s + 1, s_opl), needle_tail, flags))
    732 				break;
    733 		}
    734 	}
    735 
    736 	i64 result = string.length;
    737 	if (s < se)
    738 		result = s - string.data;
    739 	return result;
    740 }
    741 
    742 
    743 function str8
    744 str8_alloc(Arena *a, i64 length)
    745 {
    746 	str8 result = {.data = push_array(a, u8, length), .length = length};
    747 	return result;
    748 }
    749 
    750 function str8
    751 str8_from_str16(Arena *a, str16 in)
    752 {
    753 	str8 result = str8("");
    754 	if (in.length) {
    755 		i64 commit = in.length * 4;
    756 		i64 length = 0;
    757 		u8 *data = arena_commit(a, commit + 1);
    758 		u16 *beg = in.data;
    759 		u16 *end = in.data + in.length;
    760 		while (beg < end) {
    761 			UnicodeDecode decode = utf16_decode(beg, end - beg);
    762 			length += utf8_encode(data + length, decode.cp);
    763 			beg    += decode.consumed;
    764 		}
    765 		data[length] = 0;
    766 		result = (str8){.length = length, .data = data};
    767 		arena_pop(a, commit - length);
    768 	}
    769 	return result;
    770 }
    771 
    772 function str16
    773 str16_from_str8(Arena *a, str8 in)
    774 {
    775 	str16 result = {0};
    776 	if (in.length) {
    777 		i64  length   = 0;
    778 		i64  required = 2 * in.length + 1;
    779 		u16 *data     = push_array(a, u16, required);
    780 		/* TODO(rnp): utf8_decode */
    781 		for (i64 i = 0; i < in.length; i++) {
    782 			u32 cp  = in.data[i];
    783 			length += utf16_encode(data + length, cp);
    784 		}
    785 		result = (str16){.length = length, .data = data};
    786 		arena_pop(a, required - length);
    787 	}
    788 	return result;
    789 }
    790 
    791 #define push_str8_from_parts(a, j, ...) push_str8_from_parts_((a), (j), arg_list(str8, __VA_ARGS__))
    792 function str8
    793 push_str8_from_parts_(Arena *arena, str8 joiner, str8 *parts, i64 count)
    794 {
    795 	i64 length = joiner.length * (count - 1);
    796 	for (i64 i = 0; i < count; i++)
    797 		length += parts[i].length;
    798 
    799 	str8 result = {.length = length, .data = arena_commit(arena, length + 1)};
    800 
    801 	i64 offset = 0;
    802 	for (i64 i = 0; i < count; i++) {
    803 		if (i != 0) {
    804 			memory_copy(result.data + offset, joiner.data, (uz)joiner.length);
    805 			offset += joiner.length;
    806 		}
    807 		memory_copy(result.data + offset, parts[i].data, (uz)parts[i].length);
    808 		offset += parts[i].length;
    809 	}
    810 	result.data[result.length] = 0;
    811 
    812 	return result;
    813 }
    814 
    815 #define push_s8_from_parts(a, j, ...) push_s8_from_parts_((a), (j), arg_list(s8, __VA_ARGS__))
    816 function s8
    817 push_s8_from_parts_(Arena *arena, s8 joiner, s8 *parts, iz count)
    818 {
    819 	iz length = joiner.len * (count - 1);
    820 	for (iz i = 0; i < count; i++)
    821 		length += parts[i].len;
    822 
    823 	s8 result = {.len = length, .data = arena_commit(arena, length + 1)};
    824 
    825 	iz offset = 0;
    826 	for (iz i = 0; i < count; i++) {
    827 		if (i != 0) {
    828 			mem_copy(result.data + offset, joiner.data, (uz)joiner.len);
    829 			offset += joiner.len;
    830 		}
    831 		mem_copy(result.data + offset, parts[i].data, (uz)parts[i].len);
    832 		offset += parts[i].len;
    833 	}
    834 	result.data[result.len] = 0;
    835 
    836 	return result;
    837 }
    838 
    839 function str8
    840 push_str8(Arena *a, str8 str)
    841 {
    842 	str8 result    = str8_alloc(a, str.length + 1);
    843 	result.length -= 1;
    844 	memory_copy(result.data, str.data, (uz)result.length);
    845 	return result;
    846 }
    847 
    848 function s8
    849 push_s8(Arena *a, s8 str)
    850 {
    851 	str8 copy   = push_str8(a, str8_from_s8(str));
    852 	s8   result = s8_from_str8(copy);
    853 	return result;
    854 }
    855 
    856 // TODO(rnp): replace with handwritten version
    857 function str8
    858 push_str8_fv(Arena *arena, const char *format, va_list args)
    859 {
    860 	Stream sb = arena_stream(*arena);
    861 	stream_appendfv(&sb, format, args);
    862 	s8 s = arena_stream_commit(arena, &sb);
    863 	str8 result = {.length = s.len, .data = s.data};
    864 	return result;
    865 }
    866 
    867 function str8
    868 push_f64_string(Arena *arena, f64 value, u64 precision)
    869 {
    870 	Stream sb = arena_stream(*arena);
    871 	stream_append_f64(&sb, value, precision);
    872 	s8 s = arena_stream_commit(arena, &sb);
    873 	str8 result = {.length = s.len, .data = s.data};
    874 	return result;
    875 }
    876 
    877 /* NOTE(rnp): from Hacker's Delight */
    878 function force_inline u64
    879 round_down_power_of_two(u64 a)
    880 {
    881 	u64 result = 0x8000000000000000ULL >> clz_u64(a);
    882 	return result;
    883 }
    884 
    885 function force_inline u64
    886 round_up_power_of_two(u64 a)
    887 {
    888 	u64 result = 0x8000000000000000ULL >> (clz_u64(a - 1) - 1);
    889 	return result;
    890 }
    891 
    892 function force_inline iz
    893 round_up_to(iz value, iz multiple)
    894 {
    895 	iz result = value;
    896 	if (value % multiple != 0)
    897 		result += multiple - value % multiple;
    898 	return result;
    899 }
    900 
    901 function void
    902 split_rect_horizontal(Rect rect, f32 fraction, Rect *left, Rect *right)
    903 {
    904 	if (left) {
    905 		left->pos    = rect.pos;
    906 		left->size.h = rect.size.h;
    907 		left->size.w = rect.size.w * fraction;
    908 	}
    909 	if (right) {
    910 		right->pos    = rect.pos;
    911 		right->pos.x += rect.size.w * fraction;
    912 		right->size.h = rect.size.h;
    913 		right->size.w = rect.size.w * (1.0f - fraction);
    914 	}
    915 }
    916 
    917 function void
    918 split_rect_vertical(Rect rect, f32 fraction, Rect *top, Rect *bot)
    919 {
    920 	if (top) {
    921 		top->pos    = rect.pos;
    922 		top->size.w = rect.size.w;
    923 		top->size.h = rect.size.h * fraction;
    924 	}
    925 	if (bot) {
    926 		bot->pos    = rect.pos;
    927 		bot->pos.y += rect.size.h * fraction;
    928 		bot->size.w = rect.size.w;
    929 		bot->size.h = rect.size.h * (1.0f - fraction);
    930 	}
    931 }
    932 
    933 function void
    934 cut_rect_horizontal(Rect rect, f32 at, Rect *left, Rect *right)
    935 {
    936 	at = MIN(at, rect.size.w);
    937 	if (left) {
    938 		*left = rect;
    939 		left->size.w = at;
    940 	}
    941 	if (right) {
    942 		*right = rect;
    943 		right->pos.x  += at;
    944 		right->size.w -= at;
    945 	}
    946 }
    947 
    948 function void
    949 cut_rect_vertical(Rect rect, f32 at, Rect *top, Rect *bot)
    950 {
    951 	at = MIN(at, rect.size.h);
    952 	if (top) {
    953 		*top = rect;
    954 		top->size.h = at;
    955 	}
    956 	if (bot) {
    957 		*bot = rect;
    958 		bot->pos.y  += at;
    959 		bot->size.h -= at;
    960 	}
    961 }
    962 
    963 function NumberConversion
    964 integer_from_str8(str8 raw)
    965 {
    966 	read_only local_persist alignas(64) i8 lut[64] = {
    967 		 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
    968 		-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    969 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    970 		-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    971 	};
    972 
    973 	NumberConversion result = {.unparsed = raw};
    974 
    975 	i64 i     = 0;
    976 	i64 scale = 1;
    977 	if (raw.length > 0 && raw.data[0] == '-') {
    978 		scale = -1;
    979 		i     =  1;
    980 	}
    981 
    982 	b32 hex = 0;
    983 	if (raw.length - i > 2 && raw.data[i] == '0' && (raw.data[1] == 'x' || raw.data[1] == 'X')) {
    984 		hex = 1;
    985 		i += 2;
    986 	}
    987 
    988 	#define integer_conversion_body(radix, clamp) do {\
    989 		for (; i < raw.length; i++) {\
    990 			i64 value = lut[Min((u8)(raw.data[i] - (u8)'0'), clamp)];\
    991 			if (value >= 0) {\
    992 				if (result.U64 > (U64_MAX - (u64)value) / radix) {\
    993 					result.result = NumberConversionResult_OutOfRange;\
    994 					result.U64    = U64_MAX;\
    995 					return result;\
    996 				} else {\
    997 					result.U64 = radix * result.U64 + (u64)value;\
    998 				}\
    999 			} else {\
   1000 				break;\
   1001 			}\
   1002 		}\
   1003 	} while (0)
   1004 
   1005 	if (hex) integer_conversion_body(16u, 63u);
   1006 	else     integer_conversion_body(10u, 15u);
   1007 
   1008 	#undef integer_conversion_body
   1009 
   1010 	result.unparsed = (str8){.length = raw.length - i, .data = raw.data + i};
   1011 	result.result   = i > 0 ? NumberConversionResult_Success : NumberConversionResult_Invalid;
   1012 	result.kind     = NumberConversionKind_Integer;
   1013 	if (scale < 0) result.U64 = 0 - result.U64;
   1014 
   1015 	return result;
   1016 }
   1017 
   1018 function NumberConversion
   1019 number_from_str8(str8 s)
   1020 {
   1021 	NumberConversion result  = {.unparsed = s};
   1022 	NumberConversion integer = integer_from_str8(s);
   1023 	if (integer.result == NumberConversionResult_Success) {
   1024 		if (integer.unparsed.length != 0 && integer.unparsed.data[0] == '.') {
   1025 			s = integer.unparsed;
   1026 			s.data++;
   1027 			s.length--;
   1028 
   1029 			while (s.length > 0 && s.data[s.length - 1] == '0') s.length--;
   1030 
   1031 			NumberConversion fractional = integer_from_str8(s);
   1032 			if (fractional.result == NumberConversionResult_Success || s.length == 0) {
   1033 				result.F64 = (f64)fractional.U64;
   1034 
   1035 				u64 divisor = (u64)(fractional.unparsed.data - s.data);
   1036 				while (divisor > 0) { result.F64 /= 10.0; divisor--; }
   1037 
   1038 				result.F64 += (f64)integer.S64;
   1039 
   1040 				result.result   = NumberConversionResult_Success;
   1041 				result.kind     = NumberConversionKind_Float;
   1042 				result.unparsed = fractional.unparsed;
   1043 			}
   1044 		} else {
   1045 			result = integer;
   1046 		}
   1047 	}
   1048 	return result;
   1049 }