util.c (16091B)
1 /* See LICENSE for license details. */ 2 static i32 hadamard_12_12_transpose[] = { 3 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, 5 1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 6 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, 7 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 8 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 9 1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 10 1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, 11 1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, 12 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, 13 1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 14 1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, 15 }; 16 17 #define zero_struct(s) mem_clear(s, 0, sizeof(*s)) 18 static void * 19 mem_clear(void *p_, u8 c, iz size) 20 { 21 u8 *p = p_; 22 while (size > 0) p[--size] = c; 23 return p; 24 } 25 26 static void 27 mem_copy(void *restrict dest, void *restrict src, uz n) 28 { 29 u8 *s = src, *d = dest; 30 for (; n; n--) *d++ = *s++; 31 } 32 33 static void 34 mem_move(u8 *dest, u8 *src, iz n) 35 { 36 if (dest < src) mem_copy(dest, src, n); 37 else while (n) { n--; dest[n] = src[n]; } 38 } 39 40 41 static u8 * 42 arena_commit(Arena *a, iz size) 43 { 44 ASSERT(a->end - a->beg >= size); 45 u8 *result = a->beg; 46 a->beg += size; 47 return result; 48 } 49 50 static void 51 arena_pop(Arena *a, iz length) 52 { 53 a->beg -= length; 54 } 55 56 #define alloc(a, t, n) (t *)alloc_(a, sizeof(t), _Alignof(t), n) 57 #define push_struct(a, t) (t *)alloc_(a, sizeof(t), _Alignof(t), 1) 58 static void * 59 alloc_(Arena *a, iz len, iz align, iz count) 60 { 61 /* NOTE: special case 0 arena */ 62 if (a->beg == 0) 63 return 0; 64 65 iz padding = -(uintptr_t)a->beg & (align - 1); 66 iz available = a->end - a->beg - padding; 67 if (available < 0 || count > available / len) 68 ASSERT(0 && "arena OOM\n"); 69 void *p = a->beg + padding; 70 a->beg += padding + count * len; 71 /* TODO: Performance? */ 72 return mem_clear(p, 0, count * len); 73 } 74 75 enum { DA_INITIAL_CAP = 8 }; 76 #define da_reserve(a, s, n) \ 77 (s)->data = da_reserve_((a), (s)->data, &(s)->capacity, (s)->count + n, \ 78 _Alignof(typeof(*(s)->data)), sizeof(*(s)->data)) 79 80 #define da_push(a, s) \ 81 ((s)->count == (s)->capacity \ 82 ? da_reserve(a, s, 1), \ 83 (s)->data + (s)->count++ \ 84 : (s)->data + (s)->count++) 85 86 static void * 87 da_reserve_(Arena *a, void *data, iz *capacity, iz needed, iz align, iz size) 88 { 89 iz cap = *capacity; 90 91 /* NOTE(rnp): handle both 0 initialized DAs and DAs that need to be moved (they started 92 * on the stack or someone allocated something in the middle of the arena during usage) */ 93 if (!data || a->beg != (u8 *)data + cap * size) { 94 void *copy = alloc_(a, size, align, cap); 95 if (data) mem_copy(copy, data, cap * size); 96 data = copy; 97 } 98 99 if (!cap) cap = DA_INITIAL_CAP; 100 while (cap < needed) cap *= 2; 101 alloc_(a, size, align, cap - *capacity); 102 *capacity = cap; 103 return data; 104 } 105 106 static Arena 107 sub_arena(Arena *a, iz len, iz align) 108 { 109 Arena result = {0}; 110 111 iz padding = -(uintptr_t)a->beg & (align - 1); 112 result.beg = a->beg + padding; 113 result.end = result.beg + len; 114 arena_commit(a, len + padding); 115 116 return result; 117 } 118 119 static TempArena 120 begin_temp_arena(Arena *a) 121 { 122 TempArena result = {.arena = a, .old_beg = a->beg}; 123 return result; 124 } 125 126 static void 127 end_temp_arena(TempArena ta) 128 { 129 Arena *a = ta.arena; 130 if (a) { 131 ASSERT(a->beg >= ta.old_beg) 132 a->beg = ta.old_beg; 133 } 134 } 135 136 static u32 137 utf8_encode(u8 *out, u32 cp) 138 { 139 u32 result = 1; 140 if (cp <= 0x7F) { 141 out[0] = cp & 0x7F; 142 } else if (cp <= 0x7FF) { 143 result = 2; 144 out[0] = ((cp >> 6) & 0x1F) | 0xC0; 145 out[1] = ((cp >> 0) & 0x3F) | 0x80; 146 } else if (cp <= 0xFFFF) { 147 result = 3; 148 out[0] = ((cp >> 12) & 0x0F) | 0xE0; 149 out[1] = ((cp >> 6) & 0x3F) | 0x80; 150 out[2] = ((cp >> 0) & 0x3F) | 0x80; 151 } else if (cp <= 0x10FFFF) { 152 result = 4; 153 out[0] = ((cp >> 18) & 0x07) | 0xF0; 154 out[1] = ((cp >> 12) & 0x3F) | 0x80; 155 out[2] = ((cp >> 6) & 0x3F) | 0x80; 156 out[3] = ((cp >> 0) & 0x3F) | 0x80; 157 } else { 158 out[0] = '?'; 159 } 160 return result; 161 } 162 163 static UnicodeDecode 164 utf16_decode(u16 *data, iz length) 165 { 166 UnicodeDecode result = {.cp = U32_MAX}; 167 if (length) { 168 result.consumed = 1; 169 result.cp = data[0]; 170 if (length > 1 && BETWEEN(data[0], 0xD800, 0xDBFF) 171 && BETWEEN(data[1], 0xDC00, 0xDFFF)) 172 { 173 result.consumed = 2; 174 result.cp = ((data[0] - 0xD800) << 10) | ((data[1] - 0xDC00) + 0x10000); 175 } 176 } 177 return result; 178 } 179 180 static u32 181 utf16_encode(u16 *out, u32 cp) 182 { 183 u32 result = 1; 184 if (cp == U32_MAX) { 185 out[0] = '?'; 186 } else if (cp < 0x10000) { 187 out[0] = cp; 188 } else { 189 u32 value = cp - 0x10000; 190 out[0] = 0xD800 + (value >> 10u); 191 out[1] = 0xDC00 + (value & 0x3FFu); 192 result = 2; 193 } 194 return result; 195 } 196 197 static Stream 198 arena_stream(Arena *a) 199 { 200 Stream result = {0}; 201 result.data = a->beg; 202 result.cap = a->end - a->beg; 203 a->beg = a->end; 204 return result; 205 } 206 207 static Stream 208 stream_alloc(Arena *a, iz cap) 209 { 210 Stream result = {.cap = cap}; 211 result.data = alloc(a, u8, cap); 212 return result; 213 } 214 215 static s8 216 stream_to_s8(Stream *s) 217 { 218 s8 result = {.len = s->widx, .data = s->data}; 219 return result; 220 } 221 222 static void 223 stream_reset(Stream *s, iz index) 224 { 225 s->errors = s->cap <= index; 226 if (!s->errors) 227 s->widx = index; 228 } 229 230 static void 231 stream_commit(Stream *s, iz count) 232 { 233 s->errors |= !BETWEEN(s->widx + count, 0, s->cap); 234 if (!s->errors) 235 s->widx += count; 236 } 237 238 static void 239 stream_append(Stream *s, void *data, iz count) 240 { 241 s->errors |= (s->cap - s->widx) < count; 242 if (!s->errors) { 243 mem_copy(s->data + s->widx, data, count); 244 s->widx += count; 245 } 246 } 247 248 static void 249 stream_append_byte(Stream *s, u8 b) 250 { 251 stream_append(s, &b, 1); 252 } 253 254 static void 255 stream_pad(Stream *s, u8 b, i32 n) 256 { 257 while (n > 0) stream_append_byte(s, b), n--; 258 } 259 260 static void 261 stream_append_s8(Stream *s, s8 str) 262 { 263 stream_append(s, str.data, str.len); 264 } 265 266 static void 267 stream_append_s8_array(Stream *s, s8 *strs, iz count) 268 { 269 for (iz i = 0; i < count; i++) 270 stream_append(s, strs[i].data, strs[i].len); 271 } 272 273 static void 274 stream_append_u64(Stream *s, u64 n) 275 { 276 u8 tmp[64]; 277 u8 *end = tmp + sizeof(tmp); 278 u8 *beg = end; 279 do { *--beg = '0' + (n % 10); } while (n /= 10); 280 stream_append(s, beg, end - beg); 281 } 282 283 static void 284 stream_append_hex_u64(Stream *s, u64 n) 285 { 286 if (!s->errors) { 287 static u8 hex[16] = {"0123456789abcdef"}; 288 u8 buf[16]; 289 u8 *end = buf + sizeof(buf); 290 u8 *beg = end; 291 while (n) { 292 *--beg = hex[n & 0x0F]; 293 n >>= 4; 294 } 295 while (end - beg < 2) 296 *--beg = '0'; 297 stream_append(s, beg, end - beg); 298 } 299 } 300 301 static void 302 stream_append_i64(Stream *s, i64 n) 303 { 304 if (n < 0) { 305 stream_append_byte(s, '-'); 306 n *= -1; 307 } 308 stream_append_u64(s, n); 309 } 310 311 static void 312 stream_append_f64(Stream *s, f64 f, i64 prec) 313 { 314 if (f < 0) { 315 stream_append_byte(s, '-'); 316 f *= -1; 317 } 318 319 /* NOTE: round last digit */ 320 f += 0.5f / prec; 321 322 if (f >= (f64)(-1UL >> 1)) { 323 stream_append_s8(s, s8("inf")); 324 } else { 325 u64 integral = f; 326 u64 fraction = (f - integral) * prec; 327 stream_append_u64(s, integral); 328 stream_append_byte(s, '.'); 329 for (i64 i = prec / 10; i > 1; i /= 10) { 330 if (i > fraction) 331 stream_append_byte(s, '0'); 332 } 333 stream_append_u64(s, fraction); 334 } 335 } 336 337 static void 338 stream_append_f64_e(Stream *s, f64 f) 339 { 340 /* TODO: there should be a better way of doing this */ 341 #if 0 342 /* NOTE: we ignore subnormal numbers for now */ 343 union { f64 f; u64 u; } u = {.f = f}; 344 i32 exponent = ((u.u >> 52) & 0x7ff) - 1023; 345 f32 log_10_of_2 = 0.301f; 346 i32 scale = (exponent * log_10_of_2); 347 /* NOTE: normalize f */ 348 for (i32 i = ABS(scale); i > 0; i--) 349 f *= (scale > 0)? 0.1f : 10.0f; 350 #else 351 i32 scale = 0; 352 if (f != 0) { 353 while (f > 1) { 354 f *= 0.1f; 355 scale++; 356 } 357 while (f < 1) { 358 f *= 10.0f; 359 scale--; 360 } 361 } 362 #endif 363 364 i32 prec = 100; 365 stream_append_f64(s, f, prec); 366 stream_append_byte(s, 'e'); 367 stream_append_byte(s, scale >= 0? '+' : '-'); 368 for (i32 i = prec / 10; i > 1; i /= 10) 369 stream_append_byte(s, '0'); 370 stream_append_u64(s, ABS(scale)); 371 } 372 373 static void 374 stream_append_v2(Stream *s, v2 v) 375 { 376 stream_append_byte(s, '{'); 377 stream_append_f64(s, v.x, 100); 378 stream_append_s8(s, s8(", ")); 379 stream_append_f64(s, v.y, 100); 380 stream_append_byte(s, '}'); 381 } 382 383 /* NOTE(rnp): FNV-1a hash */ 384 static u64 385 s8_hash(s8 v) 386 { 387 u64 h = 0x3243f6a8885a308d; /* digits of pi */ 388 for (; v.len; v.len--) { 389 h ^= v.data[v.len - 1] & 0xFF; 390 h *= 1111111111111111111; /* random prime */ 391 } 392 return h; 393 } 394 395 static s8 396 c_str_to_s8(char *cstr) 397 { 398 s8 result = {.data = (u8 *)cstr}; 399 if (cstr) { while (*cstr) { result.len++; cstr++; } } 400 return result; 401 } 402 403 /* NOTE(rnp): returns < 0 if byte is not found */ 404 static iz 405 s8_scan_backwards(s8 s, u8 byte) 406 { 407 iz result = s.len; 408 while (result && s.data[result - 1] != byte) result--; 409 result--; 410 return result; 411 } 412 413 static s8 414 s8_cut_head(s8 s, iz cut) 415 { 416 s8 result = s; 417 if (cut > 0) { 418 result.data += cut; 419 result.len -= cut; 420 } 421 return result; 422 } 423 424 static s8 425 s8_alloc(Arena *a, iz len) 426 { 427 return (s8){ .data = alloc(a, u8, len), .len = len }; 428 } 429 430 static s8 431 s16_to_s8(Arena *a, s16 in) 432 { 433 s8 result = {0}; 434 if (in.len) { 435 iz commit = in.len * 4; 436 iz length = 0; 437 u8 *data = arena_commit(a, commit + 1); 438 u16 *beg = in.data; 439 u16 *end = in.data + in.len; 440 while (beg < end) { 441 UnicodeDecode decode = utf16_decode(beg, end - beg); 442 length += utf8_encode(data + length, decode.cp); 443 beg += decode.consumed; 444 } 445 data[length] = 0; 446 result = (s8){.len = length, .data = data}; 447 arena_pop(a, commit - length); 448 } 449 return result; 450 } 451 452 static s16 453 s8_to_s16(Arena *a, s8 in) 454 { 455 s16 result = {0}; 456 if (in.len) { 457 iz required = 2 * in.len + 1; 458 u16 *data = alloc(a, u16, required); 459 iz length = 0; 460 /* TODO(rnp): utf8_decode */ 461 for (iz i = 0; i < in.len; i++) { 462 u32 cp = in.data[i]; 463 length += utf16_encode(data + length, cp); 464 } 465 result = (s16){.len = length, .data = data}; 466 arena_pop(a, required - length); 467 } 468 return result; 469 } 470 471 static s8 472 push_s8(Arena *a, s8 str) 473 { 474 s8 result = s8_alloc(a, str.len); 475 mem_copy(result.data, str.data, result.len); 476 return result; 477 } 478 479 static s8 480 push_s8_zero(Arena *a, s8 str) 481 { 482 s8 result = s8_alloc(a, str.len + 1); 483 result.len -= 1; 484 mem_copy(result.data, str.data, result.len); 485 return result; 486 } 487 488 static u32 489 round_down_power_of_2(u32 a) 490 { 491 u32 result = 0x80000000UL >> clz_u32(a); 492 return result; 493 } 494 495 static b32 496 uv2_equal(uv2 a, uv2 b) 497 { 498 return a.x == b.x && a.y == b.y; 499 } 500 501 static b32 502 uv3_equal(uv3 a, uv3 b) 503 { 504 return a.x == b.x && a.y == b.y && a.z == b.z; 505 } 506 507 static v3 508 cross(v3 a, v3 b) 509 { 510 v3 result = { 511 .x = a.y * b.z - a.z * b.y, 512 .y = a.z * b.x - a.x * b.z, 513 .z = a.x * b.y - a.y * b.x, 514 }; 515 return result; 516 } 517 518 static v3 519 sub_v3(v3 a, v3 b) 520 { 521 v3 result = { 522 .x = a.x - b.x, 523 .y = a.y - b.y, 524 .z = a.z - b.z, 525 }; 526 return result; 527 } 528 529 static f32 530 length_v3(v3 a) 531 { 532 f32 result = a.x * a.x + a.y * a.y + a.z * a.z; 533 return result; 534 } 535 536 static v3 537 normalize_v3(v3 a) 538 { 539 f32 length = length_v3(a); 540 v3 result = {.x = a.x / length, .y = a.y / length, .z = a.z / length}; 541 return result; 542 } 543 544 static v2 545 clamp_v2_rect(v2 v, Rect r) 546 { 547 v2 result = v; 548 result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x); 549 result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y); 550 return result; 551 } 552 553 static v2 554 add_v2(v2 a, v2 b) 555 { 556 v2 result = { 557 .x = a.x + b.x, 558 .y = a.y + b.y, 559 }; 560 return result; 561 } 562 563 static v2 564 sub_v2(v2 a, v2 b) 565 { 566 v2 result = { 567 .x = a.x - b.x, 568 .y = a.y - b.y, 569 }; 570 return result; 571 } 572 573 static v2 574 scale_v2(v2 a, f32 scale) 575 { 576 v2 result = { 577 .x = a.x * scale, 578 .y = a.y * scale, 579 }; 580 return result; 581 } 582 583 static v2 584 mul_v2(v2 a, v2 b) 585 { 586 v2 result = { 587 .x = a.x * b.x, 588 .y = a.y * b.y, 589 }; 590 return result; 591 } 592 593 function v2 594 div_v2(v2 a, v2 b) 595 { 596 v2 result; 597 result.x = a.x / b.x; 598 result.y = a.y / b.y; 599 return result; 600 } 601 602 603 static v2 604 floor_v2(v2 a) 605 { 606 v2 result; 607 result.x = (u32)a.x; 608 result.y = (u32)a.y; 609 return result; 610 } 611 612 static f32 613 magnitude_v2(v2 a) 614 { 615 f32 result = sqrt_f32(a.x * a.x + a.y * a.y); 616 return result; 617 } 618 619 static b32 620 uv4_equal(uv4 a, uv4 b) 621 { 622 return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; 623 } 624 625 static v4 626 sub_v4(v4 a, v4 b) 627 { 628 v4 result; 629 result.x = a.x - b.x; 630 result.y = a.y - b.y; 631 result.z = a.z - b.z; 632 result.w = a.w - b.w; 633 return result; 634 } 635 636 static void 637 split_rect_horizontal(Rect rect, f32 fraction, Rect *left, Rect *right) 638 { 639 if (left) { 640 left->pos = rect.pos; 641 left->size.h = rect.size.h; 642 left->size.w = rect.size.w * fraction; 643 } 644 if (right) { 645 right->pos = rect.pos; 646 right->pos.x += rect.size.w * fraction; 647 right->size.h = rect.size.h; 648 right->size.w = rect.size.w * (1.0f - fraction); 649 } 650 } 651 652 static void 653 split_rect_vertical(Rect rect, f32 fraction, Rect *top, Rect *bot) 654 { 655 if (top) { 656 top->pos = rect.pos; 657 top->size.w = rect.size.w; 658 top->size.h = rect.size.h * fraction; 659 } 660 if (bot) { 661 bot->pos = rect.pos; 662 bot->pos.y += rect.size.h * fraction; 663 bot->size.w = rect.size.w; 664 bot->size.h = rect.size.h * (1.0f - fraction); 665 } 666 } 667 668 static void 669 cut_rect_horizontal(Rect rect, f32 at, Rect *left, Rect *right) 670 { 671 at = MIN(at, rect.size.w); 672 if (left) { 673 *left = rect; 674 left->size.w = at; 675 } 676 if (right) { 677 *right = rect; 678 right->pos.x += at; 679 right->size.w -= at; 680 } 681 } 682 683 static void 684 cut_rect_vertical(Rect rect, f32 at, Rect *top, Rect *bot) 685 { 686 at = MIN(at, rect.size.h); 687 if (top) { 688 *top = rect; 689 top->size.h = at; 690 } 691 if (bot) { 692 *bot = rect; 693 bot->pos.y += at; 694 bot->size.h -= at; 695 } 696 } 697 698 static f64 699 parse_f64(s8 s) 700 { 701 f64 integral = 0, fractional = 0, sign = 1; 702 703 if (s.len && *s.data == '-') { 704 sign = -1; 705 s.data++; 706 s.len--; 707 } 708 709 while (s.len && *s.data != '.') { 710 integral *= 10; 711 integral += *s.data - '0'; 712 s.data++; 713 s.len--; 714 } 715 716 if (*s.data == '.') { s.data++; s.len--; } 717 718 while (s.len) { 719 ASSERT(s.data[s.len - 1] != '.'); 720 fractional /= 10; 721 fractional += (f64)(s.data[--s.len] - '0') / 10.0; 722 } 723 f64 result = sign * (integral + fractional); 724 return result; 725 } 726 727 static FileWatchDirectory * 728 lookup_file_watch_directory(FileWatchContext *ctx, u64 hash) 729 { 730 FileWatchDirectory *result = 0; 731 732 for (u32 i = 0; i < ctx->directory_watch_count; i++) { 733 FileWatchDirectory *test = ctx->directory_watches + i; 734 if (test->hash == hash) { 735 result = test; 736 break; 737 } 738 } 739 740 return result; 741 } 742 743 static void 744 insert_file_watch(FileWatchDirectory *dir, s8 name, iptr user_data, file_watch_callback *callback) 745 { 746 ASSERT(dir->file_watch_count < ARRAY_COUNT(dir->file_watches)); 747 FileWatch *fw = dir->file_watches + dir->file_watch_count++; 748 fw->hash = s8_hash(name); 749 fw->user_data = user_data; 750 fw->callback = callback; 751 } 752 753 static void 754 fill_kronecker_sub_matrix(i32 *out, i32 out_stride, i32 scale, i32 *b, uv2 b_dim) 755 { 756 f32x4 vscale = dup_f32x4(scale); 757 for (u32 i = 0; i < b_dim.y; i++) { 758 for (u32 j = 0; j < b_dim.x; j += 4, b += 4) { 759 f32x4 vb = cvt_i32x4_f32x4(load_i32x4(b)); 760 store_i32x4(cvt_f32x4_i32x4(mul_f32x4(vscale, vb)), out + j); 761 } 762 out += out_stride; 763 } 764 } 765 766 /* NOTE: this won't check for valid space/etc and assumes row major order */ 767 static void 768 kronecker_product(i32 *out, i32 *a, uv2 a_dim, i32 *b, uv2 b_dim) 769 { 770 uv2 out_dim = {.x = a_dim.x * b_dim.x, .y = a_dim.y * b_dim.y}; 771 ASSERT(out_dim.y % 4 == 0); 772 for (u32 i = 0; i < a_dim.y; i++) { 773 i32 *vout = out; 774 for (u32 j = 0; j < a_dim.x; j++, a++) { 775 fill_kronecker_sub_matrix(vout, out_dim.y, *a, b, b_dim); 776 vout += b_dim.y; 777 } 778 out += out_dim.y * b_dim.x; 779 } 780 } 781 782 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */ 783 static void 784 fill_hadamard_transpose(i32 *out, i32 *tmp, u32 dim) 785 { 786 ASSERT(dim); 787 b32 power_of_2 = ISPOWEROF2(dim); 788 b32 multiple_of_12 = dim % 12 == 0; 789 790 if (!power_of_2 && !multiple_of_12) 791 return; 792 793 if (!power_of_2) { 794 ASSERT(multiple_of_12); 795 dim /= 12; 796 } 797 798 i32 *m; 799 if (power_of_2) m = out; 800 else m = tmp; 801 802 #define IND(i, j) ((i) * dim + (j)) 803 m[0] = 1; 804 for (u32 k = 1; k < dim; k *= 2) { 805 for (u32 i = 0; i < k; i++) { 806 for (u32 j = 0; j < k; j++) { 807 i32 val = m[IND(i, j)]; 808 m[IND(i + k, j)] = val; 809 m[IND(i, j + k)] = val; 810 m[IND(i + k, j + k)] = -val; 811 } 812 } 813 } 814 #undef IND 815 816 if (!power_of_2) 817 kronecker_product(out, tmp, (uv2){.x = dim, .y = dim}, hadamard_12_12_transpose, 818 (uv2){.x = 12, .y = 12}); 819 }