util.c (19503B)
1 /* See LICENSE for license details. */ 2 #define zero_struct(s) mem_clear(s, 0, sizeof(*s)) 3 function void * 4 mem_clear(void *restrict p_, u8 c, iz size) 5 { 6 u8 *p = p_; 7 while (size > 0) p[--size] = c; 8 return p; 9 } 10 11 function void 12 mem_copy(void *restrict dest, void *restrict src, uz n) 13 { 14 u8 *s = src, *d = dest; 15 for (; n; n--) *d++ = *s++; 16 } 17 18 function void 19 mem_move(u8 *dest, u8 *src, iz n) 20 { 21 if (dest < src) mem_copy(dest, src, n); 22 else while (n) { n--; dest[n] = src[n]; } 23 } 24 25 26 function u8 * 27 arena_commit(Arena *a, iz size) 28 { 29 ASSERT(a->end - a->beg >= size); 30 u8 *result = a->beg; 31 a->beg += size; 32 return result; 33 } 34 35 function void 36 arena_pop(Arena *a, iz length) 37 { 38 a->beg -= length; 39 } 40 41 #define push_array(a, t, n) (t *)arena_alloc(a, sizeof(t), _Alignof(t), n) 42 #define push_struct(a, t) (t *)arena_alloc(a, sizeof(t), _Alignof(t), 1) 43 function void * 44 arena_alloc(Arena *a, iz len, iz align, iz count) 45 { 46 /* NOTE: special case 0 arena */ 47 if (a->beg == 0) 48 return 0; 49 50 iz padding = -(uintptr_t)a->beg & (align - 1); 51 iz available = a->end - a->beg - padding; 52 if (available < 0 || count > available / len) 53 ASSERT(0 && "arena OOM\n"); 54 void *p = a->beg + padding; 55 a->beg += padding + count * len; 56 /* TODO: Performance? */ 57 return mem_clear(p, 0, count * len); 58 } 59 60 #define arena_capacity(a, t) arena_capacity_(a, sizeof(t), _Alignof(t)) 61 function iz 62 arena_capacity_(Arena *a, iz size, iz alignment) 63 { 64 iz padding = -(uintptr_t)a->beg & (alignment - 1); 65 iz available = a->end - a->beg - padding; 66 iz result = available / size; 67 return result; 68 } 69 70 enum { DA_INITIAL_CAP = 4 }; 71 #define da_reserve(a, s, n) \ 72 (s)->data = da_reserve_((a), (s)->data, &(s)->capacity, (s)->count + n, \ 73 _Alignof(typeof(*(s)->data)), sizeof(*(s)->data)) 74 75 #define da_push(a, s) \ 76 ((s)->count == (s)->capacity \ 77 ? da_reserve(a, s, 1), \ 78 (s)->data + (s)->count++ \ 79 : (s)->data + (s)->count++) 80 81 function void * 82 da_reserve_(Arena *a, void *data, iz *capacity, iz needed, iz align, iz size) 83 { 84 iz cap = *capacity; 85 86 /* NOTE(rnp): handle both 0 initialized DAs and DAs that need to be moved (they started 87 * on the stack or someone allocated something in the middle of the arena during usage) */ 88 if (!data || a->beg != (u8 *)data + cap * size) { 89 void *copy = arena_alloc(a, size, align, cap); 90 if (data) mem_copy(copy, data, cap * size); 91 data = copy; 92 } 93 94 if (!cap) cap = DA_INITIAL_CAP; 95 while (cap < needed) cap *= 2; 96 arena_alloc(a, size, align, cap - *capacity); 97 *capacity = cap; 98 return data; 99 } 100 101 function Arena 102 sub_arena(Arena *a, iz len, iz align) 103 { 104 Arena result = {0}; 105 106 iz padding = -(uintptr_t)a->beg & (align - 1); 107 result.beg = a->beg + padding; 108 result.end = result.beg + len; 109 arena_commit(a, len + padding); 110 111 return result; 112 } 113 114 function TempArena 115 begin_temp_arena(Arena *a) 116 { 117 TempArena result = {.arena = a, .old_beg = a->beg}; 118 return result; 119 } 120 121 function void 122 end_temp_arena(TempArena ta) 123 { 124 Arena *a = ta.arena; 125 if (a) { 126 assert(a->beg >= ta.old_beg); 127 a->beg = ta.old_beg; 128 } 129 } 130 131 function u32 132 utf8_encode(u8 *out, u32 cp) 133 { 134 u32 result = 1; 135 if (cp <= 0x7F) { 136 out[0] = cp & 0x7F; 137 } else if (cp <= 0x7FF) { 138 result = 2; 139 out[0] = ((cp >> 6) & 0x1F) | 0xC0; 140 out[1] = ((cp >> 0) & 0x3F) | 0x80; 141 } else if (cp <= 0xFFFF) { 142 result = 3; 143 out[0] = ((cp >> 12) & 0x0F) | 0xE0; 144 out[1] = ((cp >> 6) & 0x3F) | 0x80; 145 out[2] = ((cp >> 0) & 0x3F) | 0x80; 146 } else if (cp <= 0x10FFFF) { 147 result = 4; 148 out[0] = ((cp >> 18) & 0x07) | 0xF0; 149 out[1] = ((cp >> 12) & 0x3F) | 0x80; 150 out[2] = ((cp >> 6) & 0x3F) | 0x80; 151 out[3] = ((cp >> 0) & 0x3F) | 0x80; 152 } else { 153 out[0] = '?'; 154 } 155 return result; 156 } 157 158 function UnicodeDecode 159 utf16_decode(u16 *data, iz length) 160 { 161 UnicodeDecode result = {.cp = U32_MAX}; 162 if (length) { 163 result.consumed = 1; 164 result.cp = data[0]; 165 if (length > 1 && BETWEEN(data[0], 0xD800, 0xDBFF) 166 && BETWEEN(data[1], 0xDC00, 0xDFFF)) 167 { 168 result.consumed = 2; 169 result.cp = ((data[0] - 0xD800) << 10) | ((data[1] - 0xDC00) + 0x10000); 170 } 171 } 172 return result; 173 } 174 175 function u32 176 utf16_encode(u16 *out, u32 cp) 177 { 178 u32 result = 1; 179 if (cp == U32_MAX) { 180 out[0] = '?'; 181 } else if (cp < 0x10000) { 182 out[0] = cp; 183 } else { 184 u32 value = cp - 0x10000; 185 out[0] = 0xD800 + (value >> 10u); 186 out[1] = 0xDC00 + (value & 0x3FFu); 187 result = 2; 188 } 189 return result; 190 } 191 192 function Stream 193 stream_alloc(Arena *a, iz cap) 194 { 195 Stream result = {.cap = cap}; 196 result.data = push_array(a, u8, cap); 197 return result; 198 } 199 200 function s8 201 stream_to_s8(Stream *s) 202 { 203 s8 result = s8(""); 204 if (!s->errors) result = (s8){.len = s->widx, .data = s->data}; 205 return result; 206 } 207 208 function void 209 stream_reset(Stream *s, iz index) 210 { 211 s->errors = s->cap <= index; 212 if (!s->errors) 213 s->widx = index; 214 } 215 216 function void 217 stream_commit(Stream *s, iz count) 218 { 219 s->errors |= !BETWEEN(s->widx + count, 0, s->cap); 220 if (!s->errors) 221 s->widx += count; 222 } 223 224 function void 225 stream_append(Stream *s, void *data, iz count) 226 { 227 s->errors |= (s->cap - s->widx) < count; 228 if (!s->errors) { 229 mem_copy(s->data + s->widx, data, count); 230 s->widx += count; 231 } 232 } 233 234 function void 235 stream_append_byte(Stream *s, u8 b) 236 { 237 stream_append(s, &b, 1); 238 } 239 240 function void 241 stream_pad(Stream *s, u8 b, i32 n) 242 { 243 while (n > 0) stream_append_byte(s, b), n--; 244 } 245 246 function void 247 stream_append_s8(Stream *s, s8 str) 248 { 249 stream_append(s, str.data, str.len); 250 } 251 252 #define stream_append_s8s(s, ...) stream_append_s8s_(s, arg_list(s8, ##__VA_ARGS__)) 253 function void 254 stream_append_s8s_(Stream *s, s8 *strs, iz count) 255 { 256 for (iz i = 0; i < count; i++) 257 stream_append(s, strs[i].data, strs[i].len); 258 } 259 260 function void 261 stream_append_u64(Stream *s, u64 n) 262 { 263 u8 tmp[64]; 264 u8 *end = tmp + sizeof(tmp); 265 u8 *beg = end; 266 do { *--beg = '0' + (n % 10); } while (n /= 10); 267 stream_append(s, beg, end - beg); 268 } 269 270 function void 271 stream_append_hex_u64(Stream *s, u64 n) 272 { 273 if (!s->errors) { 274 u8 buf[16]; 275 u8 *end = buf + sizeof(buf); 276 u8 *beg = end; 277 while (n) { 278 *--beg = "0123456789abcdef"[n & 0x0F]; 279 n >>= 4; 280 } 281 while (end - beg < 2) 282 *--beg = '0'; 283 stream_append(s, beg, end - beg); 284 } 285 } 286 287 function void 288 stream_append_i64(Stream *s, i64 n) 289 { 290 if (n < 0) { 291 stream_append_byte(s, '-'); 292 n *= -1; 293 } 294 stream_append_u64(s, n); 295 } 296 297 function void 298 stream_append_f64(Stream *s, f64 f, i64 prec) 299 { 300 if (f < 0) { 301 stream_append_byte(s, '-'); 302 f *= -1; 303 } 304 305 /* NOTE: round last digit */ 306 f += 0.5f / prec; 307 308 if (f >= (f64)(-1UL >> 1)) { 309 stream_append_s8(s, s8("inf")); 310 } else { 311 u64 integral = f; 312 u64 fraction = (f - integral) * prec; 313 stream_append_u64(s, integral); 314 stream_append_byte(s, '.'); 315 for (i64 i = prec / 10; i > 1; i /= 10) { 316 if (i > fraction) 317 stream_append_byte(s, '0'); 318 } 319 stream_append_u64(s, fraction); 320 } 321 } 322 323 function void 324 stream_append_f64_e(Stream *s, f64 f) 325 { 326 /* TODO: there should be a better way of doing this */ 327 #if 0 328 /* NOTE: we ignore subnormal numbers for now */ 329 union { f64 f; u64 u; } u = {.f = f}; 330 i32 exponent = ((u.u >> 52) & 0x7ff) - 1023; 331 f32 log_10_of_2 = 0.301f; 332 i32 scale = (exponent * log_10_of_2); 333 /* NOTE: normalize f */ 334 for (i32 i = ABS(scale); i > 0; i--) 335 f *= (scale > 0)? 0.1f : 10.0f; 336 #else 337 i32 scale = 0; 338 if (f != 0) { 339 while (f > 1) { 340 f *= 0.1f; 341 scale++; 342 } 343 while (f < 1) { 344 f *= 10.0f; 345 scale--; 346 } 347 } 348 #endif 349 350 i32 prec = 100; 351 stream_append_f64(s, f, prec); 352 stream_append_byte(s, 'e'); 353 stream_append_byte(s, scale >= 0? '+' : '-'); 354 for (i32 i = prec / 10; i > 1; i /= 10) 355 stream_append_byte(s, '0'); 356 stream_append_u64(s, ABS(scale)); 357 } 358 359 function void 360 stream_append_v2(Stream *s, v2 v) 361 { 362 stream_append_byte(s, '{'); 363 stream_append_f64(s, v.x, 100); 364 stream_append_s8(s, s8(", ")); 365 stream_append_f64(s, v.y, 100); 366 stream_append_byte(s, '}'); 367 } 368 369 function Stream 370 arena_stream(Arena a) 371 { 372 Stream result = {0}; 373 result.data = a.beg; 374 result.cap = a.end - a.beg; 375 return result; 376 } 377 378 function s8 379 arena_stream_commit(Arena *a, Stream *s) 380 { 381 ASSERT(s->data == a->beg); 382 s8 result = stream_to_s8(s); 383 arena_commit(a, result.len); 384 return result; 385 } 386 387 function s8 388 arena_stream_commit_zero(Arena *a, Stream *s) 389 { 390 b32 error = s->errors || s->widx == s->cap; 391 if (!error) 392 s->data[s->widx] = 0; 393 s8 result = stream_to_s8(s); 394 arena_commit(a, result.len + 1); 395 return result; 396 } 397 398 /* NOTE(rnp): FNV-1a hash */ 399 function u64 400 s8_hash(s8 v) 401 { 402 u64 h = 0x3243f6a8885a308d; /* digits of pi */ 403 for (; v.len; v.len--) { 404 h ^= v.data[v.len - 1] & 0xFF; 405 h *= 1111111111111111111; /* random prime */ 406 } 407 return h; 408 } 409 410 function s8 411 c_str_to_s8(char *cstr) 412 { 413 s8 result = {.data = (u8 *)cstr}; 414 if (cstr) { while (*cstr) { result.len++; cstr++; } } 415 return result; 416 } 417 418 /* NOTE(rnp): returns < 0 if byte is not found */ 419 function iz 420 s8_scan_backwards(s8 s, u8 byte) 421 { 422 iz result = s.len; 423 while (result && s.data[result - 1] != byte) result--; 424 result--; 425 return result; 426 } 427 428 function s8 429 s8_cut_head(s8 s, iz cut) 430 { 431 s8 result = s; 432 if (cut > 0) { 433 result.data += cut; 434 result.len -= cut; 435 } 436 return result; 437 } 438 439 function s8 440 s8_alloc(Arena *a, iz len) 441 { 442 s8 result = {.data = push_array(a, u8, len), .len = len}; 443 return result; 444 } 445 446 function s8 447 s16_to_s8(Arena *a, s16 in) 448 { 449 s8 result = s8(""); 450 if (in.len) { 451 iz commit = in.len * 4; 452 iz length = 0; 453 u8 *data = arena_commit(a, commit + 1); 454 u16 *beg = in.data; 455 u16 *end = in.data + in.len; 456 while (beg < end) { 457 UnicodeDecode decode = utf16_decode(beg, end - beg); 458 length += utf8_encode(data + length, decode.cp); 459 beg += decode.consumed; 460 } 461 data[length] = 0; 462 result = (s8){.len = length, .data = data}; 463 arena_pop(a, commit - length); 464 } 465 return result; 466 } 467 468 function s16 469 s8_to_s16(Arena *a, s8 in) 470 { 471 s16 result = {0}; 472 if (in.len) { 473 iz required = 2 * in.len + 1; 474 u16 *data = push_array(a, u16, required); 475 iz length = 0; 476 /* TODO(rnp): utf8_decode */ 477 for (iz i = 0; i < in.len; i++) { 478 u32 cp = in.data[i]; 479 length += utf16_encode(data + length, cp); 480 } 481 result = (s16){.len = length, .data = data}; 482 arena_pop(a, required - length); 483 } 484 return result; 485 } 486 487 function s8 488 push_s8(Arena *a, s8 str) 489 { 490 s8 result = s8_alloc(a, str.len); 491 mem_copy(result.data, str.data, result.len); 492 return result; 493 } 494 495 function s8 496 push_s8_zero(Arena *a, s8 str) 497 { 498 s8 result = s8_alloc(a, str.len + 1); 499 result.len -= 1; 500 mem_copy(result.data, str.data, result.len); 501 return result; 502 } 503 504 function u32 505 round_down_power_of_2(u32 a) 506 { 507 u32 result = 0x80000000UL >> clz_u32(a); 508 return result; 509 } 510 511 function u32 512 round_up_power_of_2(u32 a) 513 { 514 u32 result = 0x80000000UL >> (clz_u32(a - 1) - 1); 515 return result; 516 } 517 518 function iz 519 round_up_to(iz value, iz multiple) 520 { 521 iz result = value; 522 if (value % multiple != 0) 523 result += multiple - value % multiple; 524 return result; 525 } 526 527 function b32 528 uv2_equal(uv2 a, uv2 b) 529 { 530 return a.x == b.x && a.y == b.y; 531 } 532 533 function b32 534 uv3_equal(uv3 a, uv3 b) 535 { 536 return a.x == b.x && a.y == b.y && a.z == b.z; 537 } 538 539 function v3 540 cross(v3 a, v3 b) 541 { 542 v3 result = { 543 .x = a.y * b.z - a.z * b.y, 544 .y = a.z * b.x - a.x * b.z, 545 .z = a.x * b.y - a.y * b.x, 546 }; 547 return result; 548 } 549 550 function v3 551 sub_v3(v3 a, v3 b) 552 { 553 v3 result = { 554 .x = a.x - b.x, 555 .y = a.y - b.y, 556 .z = a.z - b.z, 557 }; 558 return result; 559 } 560 561 function f32 562 length_v3(v3 a) 563 { 564 f32 result = a.x * a.x + a.y * a.y + a.z * a.z; 565 return result; 566 } 567 568 function v3 569 normalize_v3(v3 a) 570 { 571 f32 length = length_v3(a); 572 v3 result = {.x = a.x / length, .y = a.y / length, .z = a.z / length}; 573 return result; 574 } 575 576 function v2 577 clamp_v2_rect(v2 v, Rect r) 578 { 579 v2 result = v; 580 result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x); 581 result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y); 582 return result; 583 } 584 585 function v2 586 add_v2(v2 a, v2 b) 587 { 588 v2 result = { 589 .x = a.x + b.x, 590 .y = a.y + b.y, 591 }; 592 return result; 593 } 594 595 function v2 596 sub_v2(v2 a, v2 b) 597 { 598 v2 result = { 599 .x = a.x - b.x, 600 .y = a.y - b.y, 601 }; 602 return result; 603 } 604 605 function v2 606 scale_v2(v2 a, f32 scale) 607 { 608 v2 result = { 609 .x = a.x * scale, 610 .y = a.y * scale, 611 }; 612 return result; 613 } 614 615 function v2 616 mul_v2(v2 a, v2 b) 617 { 618 v2 result = { 619 .x = a.x * b.x, 620 .y = a.y * b.y, 621 }; 622 return result; 623 } 624 625 function v2 626 div_v2(v2 a, v2 b) 627 { 628 v2 result; 629 result.x = a.x / b.x; 630 result.y = a.y / b.y; 631 return result; 632 } 633 634 635 function v2 636 floor_v2(v2 a) 637 { 638 v2 result; 639 result.x = (i32)a.x; 640 result.y = (i32)a.y; 641 return result; 642 } 643 644 function f32 645 magnitude_v2(v2 a) 646 { 647 f32 result = sqrt_f32(a.x * a.x + a.y * a.y); 648 return result; 649 } 650 651 function uv4 652 uv4_from_u32_array(u32 v[4]) 653 { 654 uv4 result; 655 result.E[0] = v[0]; 656 result.E[1] = v[1]; 657 result.E[2] = v[2]; 658 result.E[3] = v[3]; 659 return result; 660 } 661 662 function b32 663 uv4_equal(uv4 a, uv4 b) 664 { 665 return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; 666 } 667 668 function v4 669 v4_from_f32_array(f32 v[4]) 670 { 671 v4 result; 672 result.E[0] = v[0]; 673 result.E[1] = v[1]; 674 result.E[2] = v[2]; 675 result.E[3] = v[3]; 676 return result; 677 } 678 679 function v4 680 sub_v4(v4 a, v4 b) 681 { 682 v4 result; 683 result.x = a.x - b.x; 684 result.y = a.y - b.y; 685 result.z = a.z - b.z; 686 result.w = a.w - b.w; 687 return result; 688 } 689 690 function void 691 split_rect_horizontal(Rect rect, f32 fraction, Rect *left, Rect *right) 692 { 693 if (left) { 694 left->pos = rect.pos; 695 left->size.h = rect.size.h; 696 left->size.w = rect.size.w * fraction; 697 } 698 if (right) { 699 right->pos = rect.pos; 700 right->pos.x += rect.size.w * fraction; 701 right->size.h = rect.size.h; 702 right->size.w = rect.size.w * (1.0f - fraction); 703 } 704 } 705 706 function void 707 split_rect_vertical(Rect rect, f32 fraction, Rect *top, Rect *bot) 708 { 709 if (top) { 710 top->pos = rect.pos; 711 top->size.w = rect.size.w; 712 top->size.h = rect.size.h * fraction; 713 } 714 if (bot) { 715 bot->pos = rect.pos; 716 bot->pos.y += rect.size.h * fraction; 717 bot->size.w = rect.size.w; 718 bot->size.h = rect.size.h * (1.0f - fraction); 719 } 720 } 721 722 function void 723 cut_rect_horizontal(Rect rect, f32 at, Rect *left, Rect *right) 724 { 725 at = MIN(at, rect.size.w); 726 if (left) { 727 *left = rect; 728 left->size.w = at; 729 } 730 if (right) { 731 *right = rect; 732 right->pos.x += at; 733 right->size.w -= at; 734 } 735 } 736 737 function void 738 cut_rect_vertical(Rect rect, f32 at, Rect *top, Rect *bot) 739 { 740 at = MIN(at, rect.size.h); 741 if (top) { 742 *top = rect; 743 top->size.h = at; 744 } 745 if (bot) { 746 *bot = rect; 747 bot->pos.y += at; 748 bot->size.h -= at; 749 } 750 } 751 752 function f64 753 parse_f64(s8 s) 754 { 755 f64 integral = 0, fractional = 0, sign = 1; 756 757 if (s.len && *s.data == '-') { 758 sign = -1; 759 s.data++; 760 s.len--; 761 } 762 763 while (s.len && *s.data != '.') { 764 integral *= 10; 765 integral += *s.data - '0'; 766 s.data++; 767 s.len--; 768 } 769 770 if (*s.data == '.') { s.data++; s.len--; } 771 772 while (s.len) { 773 ASSERT(s.data[s.len - 1] != '.'); 774 fractional /= 10; 775 fractional += (f64)(s.data[--s.len] - '0') / 10.0; 776 } 777 f64 result = sign * (integral + fractional); 778 return result; 779 } 780 781 function FileWatchDirectory * 782 lookup_file_watch_directory(FileWatchContext *ctx, u64 hash) 783 { 784 FileWatchDirectory *result = 0; 785 for (u32 i = 0; i < ctx->count; i++) { 786 FileWatchDirectory *test = ctx->data + i; 787 if (test->hash == hash) { 788 result = test; 789 break; 790 } 791 } 792 return result; 793 } 794 795 function void 796 fill_kronecker_sub_matrix(i32 *out, i32 out_stride, i32 scale, i32 *b, uv2 b_dim) 797 { 798 f32x4 vscale = dup_f32x4(scale); 799 for (u32 i = 0; i < b_dim.y; i++) { 800 for (u32 j = 0; j < b_dim.x; j += 4, b += 4) { 801 f32x4 vb = cvt_i32x4_f32x4(load_i32x4(b)); 802 store_i32x4(cvt_f32x4_i32x4(mul_f32x4(vscale, vb)), out + j); 803 } 804 out += out_stride; 805 } 806 } 807 808 /* NOTE: this won't check for valid space/etc and assumes row major order */ 809 function void 810 kronecker_product(i32 *out, i32 *a, uv2 a_dim, i32 *b, uv2 b_dim) 811 { 812 uv2 out_dim = {.x = a_dim.x * b_dim.x, .y = a_dim.y * b_dim.y}; 813 ASSERT(out_dim.y % 4 == 0); 814 for (u32 i = 0; i < a_dim.y; i++) { 815 i32 *vout = out; 816 for (u32 j = 0; j < a_dim.x; j++, a++) { 817 fill_kronecker_sub_matrix(vout, out_dim.y, *a, b, b_dim); 818 vout += b_dim.y; 819 } 820 out += out_dim.y * b_dim.x; 821 } 822 } 823 824 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */ 825 function i32 * 826 make_hadamard_transpose(Arena *a, u32 dim) 827 { 828 read_only local_persist i32 hadamard_12_12_transpose[] = { 829 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 830 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, 831 1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 832 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, 833 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 834 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 835 1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 836 1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, 837 1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, 838 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, 839 1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 840 1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, 841 }; 842 843 read_only local_persist i32 hadamard_20_20_transpose[] = { 844 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 845 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, 846 1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, 847 1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 848 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 849 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, 850 1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, 851 1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, 852 1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, 853 1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 854 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, 855 1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 856 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, 857 1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 858 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 859 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 860 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 861 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, 862 1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, 863 1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 864 }; 865 866 i32 *result = 0; 867 868 b32 power_of_2 = ISPOWEROF2(dim); 869 b32 multiple_of_12 = dim % 12 == 0; 870 b32 multiple_of_20 = dim % 20 == 0; 871 iz elements = dim * dim; 872 873 u32 base_dim = 0; 874 if (power_of_2) { 875 base_dim = dim; 876 } else if (multiple_of_20 && ISPOWEROF2(dim / 20)) { 877 base_dim = 20; 878 dim /= 20; 879 } else if (multiple_of_12 && ISPOWEROF2(dim / 12)) { 880 base_dim = 12; 881 dim /= 12; 882 } 883 884 if (ISPOWEROF2(dim) && base_dim && arena_capacity(a, i32) >= elements * (1 + (dim != base_dim))) { 885 result = push_array(a, i32, elements); 886 887 Arena tmp = *a; 888 i32 *m = dim == base_dim ? result : push_array(&tmp, i32, elements); 889 890 #define IND(i, j) ((i) * dim + (j)) 891 m[0] = 1; 892 for (u32 k = 1; k < dim; k *= 2) { 893 for (u32 i = 0; i < k; i++) { 894 for (u32 j = 0; j < k; j++) { 895 i32 val = m[IND(i, j)]; 896 m[IND(i + k, j)] = val; 897 m[IND(i, j + k)] = val; 898 m[IND(i + k, j + k)] = -val; 899 } 900 } 901 } 902 #undef IND 903 904 i32 *m2 = 0; 905 uv2 m2_dim; 906 switch (base_dim) { 907 case 12:{ m2 = hadamard_12_12_transpose; m2_dim = (uv2){{12, 12}}; }break; 908 case 20:{ m2 = hadamard_20_20_transpose; m2_dim = (uv2){{20, 20}}; }break; 909 } 910 if (m2) kronecker_product(result, m, (uv2){{dim, dim}}, m2, m2_dim); 911 } 912 913 return result; 914 }