util.c (14915B)
1 /* See LICENSE for license details. */ 2 static i32 hadamard_12_12_transpose[] = { 3 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, 5 1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 6 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, 7 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 8 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 9 1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 10 1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, 11 1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, 12 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, 13 1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 14 1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, 15 }; 16 17 #define zero_struct(s) mem_clear(s, 0, sizeof(*s)) 18 static void * 19 mem_clear(void *p_, u8 c, iz size) 20 { 21 u8 *p = p_; 22 while (size > 0) p[--size] = c; 23 return p; 24 } 25 26 static void 27 mem_copy(void *restrict dest, void *restrict src, uz n) 28 { 29 ASSERT(n >= 0); 30 u8 *s = src, *d = dest; 31 for (; n; n--) *d++ = *s++; 32 } 33 34 static void 35 mem_move(u8 *dest, u8 *src, iz n) 36 { 37 if (dest < src) mem_copy(dest, src, n); 38 else while (n) { n--; dest[n] = src[n]; } 39 } 40 41 42 static u8 * 43 arena_commit(Arena *a, iz size) 44 { 45 ASSERT(a->end - a->beg >= size); 46 u8 *result = a->beg; 47 a->beg += size; 48 return result; 49 } 50 51 static void 52 arena_pop(Arena *a, iz length) 53 { 54 a->beg -= length; 55 } 56 57 #define alloc(a, t, n) (t *)alloc_(a, sizeof(t), _Alignof(t), n) 58 #define push_struct(a, t) (t *)alloc_(a, sizeof(t), _Alignof(t), 1) 59 static void * 60 alloc_(Arena *a, iz len, iz align, iz count) 61 { 62 /* NOTE: special case 0 arena */ 63 if (a->beg == 0) 64 return 0; 65 66 iz padding = -(uintptr_t)a->beg & (align - 1); 67 iz available = a->end - a->beg - padding; 68 if (available < 0 || count > available / len) 69 ASSERT(0 && "arena OOM\n"); 70 void *p = a->beg + padding; 71 a->beg += padding + count * len; 72 /* TODO: Performance? */ 73 return mem_clear(p, 0, count * len); 74 } 75 76 static Arena 77 sub_arena(Arena *a, iz len, iz align) 78 { 79 Arena result = {0}; 80 81 iz padding = -(uintptr_t)a->beg & (align - 1); 82 result.beg = a->beg + padding; 83 result.end = result.beg + len; 84 arena_commit(a, len + padding); 85 86 return result; 87 } 88 89 static TempArena 90 begin_temp_arena(Arena *a) 91 { 92 TempArena result = {.arena = a, .old_beg = a->beg}; 93 return result; 94 } 95 96 static void 97 end_temp_arena(TempArena ta) 98 { 99 Arena *a = ta.arena; 100 if (a) { 101 ASSERT(a->beg >= ta.old_beg) 102 a->beg = ta.old_beg; 103 } 104 } 105 106 static u32 107 utf8_encode(u8 *out, u32 cp) 108 { 109 u32 result = 1; 110 if (cp <= 0x7F) { 111 out[0] = cp & 0x7F; 112 } else if (cp <= 0x7FF) { 113 result = 2; 114 out[0] = ((cp >> 6) & 0x1F) | 0xC0; 115 out[1] = ((cp >> 0) & 0x3F) | 0x80; 116 } else if (cp <= 0xFFFF) { 117 result = 3; 118 out[0] = ((cp >> 12) & 0x0F) | 0xE0; 119 out[1] = ((cp >> 6) & 0x3F) | 0x80; 120 out[2] = ((cp >> 0) & 0x3F) | 0x80; 121 } else if (cp <= 0x10FFFF) { 122 result = 4; 123 out[0] = ((cp >> 18) & 0x07) | 0xF0; 124 out[1] = ((cp >> 12) & 0x3F) | 0x80; 125 out[2] = ((cp >> 6) & 0x3F) | 0x80; 126 out[3] = ((cp >> 0) & 0x3F) | 0x80; 127 } else { 128 out[0] = '?'; 129 } 130 return result; 131 } 132 133 static UnicodeDecode 134 utf16_decode(u16 *data, iz length) 135 { 136 UnicodeDecode result = {.cp = U32_MAX}; 137 if (length) { 138 result.consumed = 1; 139 result.cp = data[0]; 140 if (length > 1 && BETWEEN(data[0], 0xD800, 0xDBFF) 141 && BETWEEN(data[1], 0xDC00, 0xDFFF)) 142 { 143 result.consumed = 2; 144 result.cp = ((data[0] - 0xD800) << 10) | ((data[1] - 0xDC00) + 0x10000); 145 } 146 } 147 return result; 148 } 149 150 static u32 151 utf16_encode(u16 *out, u32 cp) 152 { 153 u32 result = 1; 154 if (cp == U32_MAX) { 155 out[0] = '?'; 156 } else if (cp < 0x10000) { 157 out[0] = cp; 158 } else { 159 u32 value = cp - 0x10000; 160 out[0] = 0xD800 + (value >> 10u); 161 out[1] = 0xDC00 + (value & 0x3FFu); 162 result = 2; 163 } 164 return result; 165 } 166 167 static Stream 168 arena_stream(Arena *a) 169 { 170 Stream result = {0}; 171 result.data = a->beg; 172 result.cap = a->end - a->beg; 173 a->beg = a->end; 174 return result; 175 } 176 177 static Stream 178 stream_alloc(Arena *a, iz cap) 179 { 180 Stream result = {.cap = cap}; 181 result.data = alloc(a, u8, cap); 182 return result; 183 } 184 185 static s8 186 stream_to_s8(Stream *s) 187 { 188 s8 result = {.len = s->widx, .data = s->data}; 189 return result; 190 } 191 192 static void 193 stream_reset(Stream *s, iz index) 194 { 195 s->errors = s->cap <= index; 196 if (!s->errors) 197 s->widx = index; 198 } 199 200 static void 201 stream_commit(Stream *s, iz count) 202 { 203 s->errors |= !BETWEEN(s->widx + count, 0, s->cap); 204 if (!s->errors) 205 s->widx += count; 206 } 207 208 static void 209 stream_append(Stream *s, void *data, iz count) 210 { 211 s->errors |= (s->cap - s->widx) < count; 212 if (!s->errors) { 213 mem_copy(s->data + s->widx, data, count); 214 s->widx += count; 215 } 216 } 217 218 static void 219 stream_append_byte(Stream *s, u8 b) 220 { 221 stream_append(s, &b, 1); 222 } 223 224 static void 225 stream_append_s8(Stream *s, s8 str) 226 { 227 stream_append(s, str.data, str.len); 228 } 229 230 static void 231 stream_append_s8_array(Stream *s, s8 *strs, iz count) 232 { 233 for (iz i = 0; i < count; i++) 234 stream_append(s, strs[i].data, strs[i].len); 235 } 236 237 static void 238 stream_append_u64(Stream *s, u64 n) 239 { 240 u8 tmp[64]; 241 u8 *end = tmp + sizeof(tmp); 242 u8 *beg = end; 243 do { *--beg = '0' + (n % 10); } while (n /= 10); 244 stream_append(s, beg, end - beg); 245 } 246 247 static void 248 stream_append_hex_u64(Stream *s, u64 n) 249 { 250 if (!s->errors) { 251 static u8 hex[16] = {"0123456789abcdef"}; 252 u8 buf[16]; 253 u8 *end = buf + sizeof(buf); 254 u8 *beg = end; 255 while (n) { 256 *--beg = hex[n & 0x0F]; 257 n >>= 4; 258 } 259 while (end - beg < 2) 260 *--beg = '0'; 261 stream_append(s, beg, end - beg); 262 } 263 } 264 265 static void 266 stream_append_i64(Stream *s, i64 n) 267 { 268 if (n < 0) { 269 stream_append_byte(s, '-'); 270 n *= -1; 271 } 272 stream_append_u64(s, n); 273 } 274 275 static void 276 stream_append_f64(Stream *s, f64 f, i64 prec) 277 { 278 if (f < 0) { 279 stream_append_byte(s, '-'); 280 f *= -1; 281 } 282 283 /* NOTE: round last digit */ 284 f += 0.5f / prec; 285 286 if (f >= (f64)(-1UL >> 1)) { 287 stream_append_s8(s, s8("inf")); 288 } else { 289 u64 integral = f; 290 u64 fraction = (f - integral) * prec; 291 stream_append_u64(s, integral); 292 stream_append_byte(s, '.'); 293 for (i64 i = prec / 10; i > 1; i /= 10) { 294 if (i > fraction) 295 stream_append_byte(s, '0'); 296 } 297 stream_append_u64(s, fraction); 298 } 299 } 300 301 static void 302 stream_append_f64_e(Stream *s, f64 f) 303 { 304 /* TODO: there should be a better way of doing this */ 305 #if 0 306 /* NOTE: we ignore subnormal numbers for now */ 307 union { f64 f; u64 u; } u = {.f = f}; 308 i32 exponent = ((u.u >> 52) & 0x7ff) - 1023; 309 f32 log_10_of_2 = 0.301f; 310 i32 scale = (exponent * log_10_of_2); 311 /* NOTE: normalize f */ 312 for (i32 i = ABS(scale); i > 0; i--) 313 f *= (scale > 0)? 0.1f : 10.0f; 314 #else 315 i32 scale = 0; 316 if (f != 0) { 317 while (f > 1) { 318 f *= 0.1f; 319 scale++; 320 } 321 while (f < 1) { 322 f *= 10.0f; 323 scale--; 324 } 325 } 326 #endif 327 328 i32 prec = 100; 329 stream_append_f64(s, f, prec); 330 stream_append_byte(s, 'e'); 331 stream_append_byte(s, scale >= 0? '+' : '-'); 332 for (i32 i = prec / 10; i > 1; i /= 10) 333 stream_append_byte(s, '0'); 334 stream_append_u64(s, ABS(scale)); 335 } 336 337 static void 338 stream_append_v2(Stream *s, v2 v) 339 { 340 stream_append_byte(s, '{'); 341 stream_append_f64(s, v.x, 100); 342 stream_append_s8(s, s8(", ")); 343 stream_append_f64(s, v.y, 100); 344 stream_append_byte(s, '}'); 345 } 346 347 /* NOTE(rnp): FNV-1a hash */ 348 static u64 349 s8_hash(s8 v) 350 { 351 u64 h = 0x3243f6a8885a308d; /* digits of pi */ 352 for (; v.len; v.len--) { 353 h ^= v.data[v.len - 1] & 0xFF; 354 h *= 1111111111111111111; /* random prime */ 355 } 356 return h; 357 } 358 359 static s8 360 c_str_to_s8(char *cstr) 361 { 362 s8 result = {.data = (u8 *)cstr}; 363 while (*cstr) { result.len++; cstr++; } 364 return result; 365 } 366 367 /* NOTE(rnp): returns < 0 if byte is not found */ 368 static iz 369 s8_scan_backwards(s8 s, u8 byte) 370 { 371 iz result = s.len; 372 while (result && s.data[result - 1] != byte) result--; 373 result--; 374 return result; 375 } 376 377 static s8 378 s8_cut_head(s8 s, iz cut) 379 { 380 s8 result = s; 381 if (cut > 0) { 382 result.data += cut; 383 result.len -= cut; 384 } 385 return result; 386 } 387 388 static s8 389 s8_alloc(Arena *a, iz len) 390 { 391 return (s8){ .data = alloc(a, u8, len), .len = len }; 392 } 393 394 static s8 395 s16_to_s8(Arena *a, s16 in) 396 { 397 s8 result = {0}; 398 if (in.len) { 399 iz commit = in.len * 4; 400 iz length = 0; 401 u8 *data = arena_commit(a, commit + 1); 402 u16 *beg = in.data; 403 u16 *end = in.data + in.len; 404 while (beg < end) { 405 UnicodeDecode decode = utf16_decode(beg, end - beg); 406 length += utf8_encode(data + length, decode.cp); 407 beg += decode.consumed; 408 } 409 data[length] = 0; 410 result = (s8){.len = length, .data = data}; 411 arena_pop(a, commit - length); 412 } 413 return result; 414 } 415 416 static s16 417 s8_to_s16(Arena *a, s8 in) 418 { 419 s16 result = {0}; 420 if (in.len) { 421 iz required = 2 * in.len + 1; 422 u16 *data = alloc(a, u16, required); 423 iz length = 0; 424 /* TODO(rnp): utf8_decode */ 425 for (iz i = 0; i < in.len; i++) { 426 u32 cp = in.data[i]; 427 length += utf16_encode(data + length, cp); 428 } 429 result = (s16){.len = length, .data = data}; 430 arena_pop(a, required - length); 431 } 432 return result; 433 } 434 435 static s8 436 push_s8(Arena *a, s8 str) 437 { 438 s8 result = s8_alloc(a, str.len); 439 mem_copy(result.data, str.data, result.len); 440 return result; 441 } 442 443 static s8 444 push_s8_zero(Arena *a, s8 str) 445 { 446 s8 result = s8_alloc(a, str.len + 1); 447 result.len -= 1; 448 mem_copy(result.data, str.data, result.len); 449 return result; 450 } 451 452 static u32 453 round_down_power_of_2(u32 a) 454 { 455 u32 result = 0x80000000UL >> clz_u32(a); 456 return result; 457 } 458 459 static b32 460 uv2_equal(uv2 a, uv2 b) 461 { 462 return a.x == b.x && a.y == b.y; 463 } 464 465 static b32 466 uv3_equal(uv3 a, uv3 b) 467 { 468 return a.x == b.x && a.y == b.y && a.z == b.z; 469 } 470 471 static v3 472 cross(v3 a, v3 b) 473 { 474 v3 result = { 475 .x = a.y * b.z - a.z * b.y, 476 .y = a.z * b.x - a.x * b.z, 477 .z = a.x * b.y - a.y * b.x, 478 }; 479 return result; 480 } 481 482 static v3 483 sub_v3(v3 a, v3 b) 484 { 485 v3 result = { 486 .x = a.x - b.x, 487 .y = a.y - b.y, 488 .z = a.z - b.z, 489 }; 490 return result; 491 } 492 493 static f32 494 length_v3(v3 a) 495 { 496 f32 result = a.x * a.x + a.y * a.y + a.z * a.z; 497 return result; 498 } 499 500 static v3 501 normalize_v3(v3 a) 502 { 503 f32 length = length_v3(a); 504 v3 result = {.x = a.x / length, .y = a.y / length, .z = a.z / length}; 505 return result; 506 } 507 508 static v2 509 clamp_v2_rect(v2 v, Rect r) 510 { 511 v2 result = v; 512 result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x); 513 result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y); 514 return result; 515 } 516 517 static v2 518 add_v2(v2 a, v2 b) 519 { 520 v2 result = { 521 .x = a.x + b.x, 522 .y = a.y + b.y, 523 }; 524 return result; 525 } 526 527 static v2 528 sub_v2(v2 a, v2 b) 529 { 530 v2 result = { 531 .x = a.x - b.x, 532 .y = a.y - b.y, 533 }; 534 return result; 535 } 536 537 static v2 538 scale_v2(v2 a, f32 scale) 539 { 540 v2 result = { 541 .x = a.x * scale, 542 .y = a.y * scale, 543 }; 544 return result; 545 } 546 547 static v2 548 mul_v2(v2 a, v2 b) 549 { 550 v2 result = { 551 .x = a.x * b.x, 552 .y = a.y * b.y, 553 }; 554 return result; 555 } 556 557 static v2 558 floor_v2(v2 a) 559 { 560 v2 result; 561 result.x = (u32)a.x; 562 result.y = (u32)a.y; 563 return result; 564 } 565 566 static f32 567 magnitude_v2(v2 a) 568 { 569 f32 result = sqrt_f32(a.x * a.x + a.y * a.y); 570 return result; 571 } 572 573 static b32 574 uv4_equal(uv4 a, uv4 b) 575 { 576 return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; 577 } 578 579 static v4 580 sub_v4(v4 a, v4 b) 581 { 582 v4 result; 583 result.x = a.x - b.x; 584 result.y = a.y - b.y; 585 result.z = a.z - b.z; 586 result.w = a.w - b.w; 587 return result; 588 } 589 590 static void 591 split_rect_horizontal(Rect rect, f32 fraction, Rect *left, Rect *right) 592 { 593 if (left) { 594 left->pos = rect.pos; 595 left->size.h = rect.size.h; 596 left->size.w = rect.size.w * fraction; 597 } 598 if (right) { 599 right->pos = rect.pos; 600 right->pos.x += rect.size.w * fraction; 601 right->size.h = rect.size.h; 602 right->size.w = rect.size.w * (1.0f - fraction); 603 } 604 } 605 606 static void 607 split_rect_vertical(Rect rect, f32 fraction, Rect *top, Rect *bot) 608 { 609 if (top) { 610 top->pos = rect.pos; 611 top->size.w = rect.size.w; 612 top->size.h = rect.size.h * fraction; 613 } 614 if (bot) { 615 bot->pos = rect.pos; 616 bot->pos.y += rect.size.h * fraction; 617 bot->size.w = rect.size.w; 618 bot->size.h = rect.size.h * (1.0f - fraction); 619 } 620 } 621 622 static void 623 cut_rect_horizontal(Rect rect, f32 at, Rect *left, Rect *right) 624 { 625 at = MIN(at, rect.size.w); 626 if (left) { 627 *left = rect; 628 left->size.w = at; 629 } 630 if (right) { 631 *right = rect; 632 right->pos.x += at; 633 right->size.w -= at; 634 } 635 } 636 637 static void 638 cut_rect_vertical(Rect rect, f32 at, Rect *top, Rect *bot) 639 { 640 at = MIN(at, rect.size.h); 641 if (top) { 642 *top = rect; 643 top->size.h = at; 644 } 645 if (bot) { 646 *bot = rect; 647 bot->pos.y += at; 648 bot->size.h -= at; 649 } 650 } 651 652 static f64 653 parse_f64(s8 s) 654 { 655 f64 integral = 0, fractional = 0, sign = 1; 656 657 if (s.len && *s.data == '-') { 658 sign = -1; 659 s.data++; 660 s.len--; 661 } 662 663 while (s.len && *s.data != '.') { 664 integral *= 10; 665 integral += *s.data - '0'; 666 s.data++; 667 s.len--; 668 } 669 670 if (*s.data == '.') { s.data++; s.len--; } 671 672 while (s.len) { 673 ASSERT(s.data[s.len - 1] != '.'); 674 fractional /= 10; 675 fractional += (f64)(s.data[--s.len] - '0') / 10.0; 676 } 677 f64 result = sign * (integral + fractional); 678 return result; 679 } 680 681 static FileWatchDirectory * 682 lookup_file_watch_directory(FileWatchContext *ctx, u64 hash) 683 { 684 FileWatchDirectory *result = 0; 685 686 for (u32 i = 0; i < ctx->directory_watch_count; i++) { 687 FileWatchDirectory *test = ctx->directory_watches + i; 688 if (test->hash == hash) { 689 result = test; 690 break; 691 } 692 } 693 694 return result; 695 } 696 697 static void 698 insert_file_watch(FileWatchDirectory *dir, s8 name, iptr user_data, file_watch_callback *callback) 699 { 700 ASSERT(dir->file_watch_count < ARRAY_COUNT(dir->file_watches)); 701 FileWatch *fw = dir->file_watches + dir->file_watch_count++; 702 fw->hash = s8_hash(name); 703 fw->user_data = user_data; 704 fw->callback = callback; 705 } 706 707 static void 708 fill_kronecker_sub_matrix(i32 *out, i32 out_stride, i32 scale, i32 *b, uv2 b_dim) 709 { 710 f32x4 vscale = dup_f32x4(scale); 711 for (u32 i = 0; i < b_dim.y; i++) { 712 for (u32 j = 0; j < b_dim.x; j += 4, b += 4) { 713 f32x4 vb = cvt_i32x4_f32x4(load_i32x4(b)); 714 store_i32x4(cvt_f32x4_i32x4(mul_f32x4(vscale, vb)), out + j); 715 } 716 out += out_stride; 717 } 718 } 719 720 /* NOTE: this won't check for valid space/etc and assumes row major order */ 721 static void 722 kronecker_product(i32 *out, i32 *a, uv2 a_dim, i32 *b, uv2 b_dim) 723 { 724 uv2 out_dim = {.x = a_dim.x * b_dim.x, .y = a_dim.y * b_dim.y}; 725 ASSERT(out_dim.y % 4 == 0); 726 for (u32 i = 0; i < a_dim.y; i++) { 727 i32 *vout = out; 728 for (u32 j = 0; j < a_dim.x; j++, a++) { 729 fill_kronecker_sub_matrix(vout, out_dim.y, *a, b, b_dim); 730 vout += b_dim.y; 731 } 732 out += out_dim.y * b_dim.x; 733 } 734 } 735 736 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */ 737 static void 738 fill_hadamard_transpose(i32 *out, i32 *tmp, u32 dim) 739 { 740 ASSERT(dim); 741 b32 power_of_2 = ISPOWEROF2(dim); 742 b32 multiple_of_12 = dim % 12 == 0; 743 744 if (!power_of_2 && !multiple_of_12) 745 return; 746 747 if (!power_of_2) { 748 ASSERT(multiple_of_12); 749 dim /= 12; 750 } 751 752 i32 *m; 753 if (power_of_2) m = out; 754 else m = tmp; 755 756 #define IND(i, j) ((i) * dim + (j)) 757 m[0] = 1; 758 for (u32 k = 1; k < dim; k *= 2) { 759 for (u32 i = 0; i < k; i++) { 760 for (u32 j = 0; j < k; j++) { 761 i32 val = m[IND(i, j)]; 762 m[IND(i + k, j)] = val; 763 m[IND(i, j + k)] = val; 764 m[IND(i + k, j + k)] = -val; 765 } 766 } 767 } 768 #undef IND 769 770 if (!power_of_2) 771 kronecker_product(out, tmp, (uv2){.x = dim, .y = dim}, hadamard_12_12_transpose, 772 (uv2){.x = 12, .y = 12}); 773 }