util.c (17163B)
1 /* See LICENSE for license details. */ 2 static i32 hadamard_12_12_transpose[] = { 3 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, 5 1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 6 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, 7 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 8 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 9 1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 10 1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, 11 1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, 12 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, 13 1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 14 1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, 15 }; 16 17 #define zero_struct(s) mem_clear(s, 0, sizeof(*s)) 18 static void * 19 mem_clear(void *p_, u8 c, iz size) 20 { 21 u8 *p = p_; 22 while (size > 0) p[--size] = c; 23 return p; 24 } 25 26 static void 27 mem_copy(void *restrict dest, void *restrict src, uz n) 28 { 29 u8 *s = src, *d = dest; 30 for (; n; n--) *d++ = *s++; 31 } 32 33 static void 34 mem_move(u8 *dest, u8 *src, iz n) 35 { 36 if (dest < src) mem_copy(dest, src, n); 37 else while (n) { n--; dest[n] = src[n]; } 38 } 39 40 41 static u8 * 42 arena_commit(Arena *a, iz size) 43 { 44 ASSERT(a->end - a->beg >= size); 45 u8 *result = a->beg; 46 a->beg += size; 47 return result; 48 } 49 50 static void 51 arena_pop(Arena *a, iz length) 52 { 53 a->beg -= length; 54 } 55 56 #define push_array(a, t, n) (t *)arena_alloc(a, sizeof(t), _Alignof(t), n) 57 #define push_struct(a, t) (t *)arena_alloc(a, sizeof(t), _Alignof(t), 1) 58 function void * 59 arena_alloc(Arena *a, iz len, iz align, iz count) 60 { 61 /* NOTE: special case 0 arena */ 62 if (a->beg == 0) 63 return 0; 64 65 iz padding = -(uintptr_t)a->beg & (align - 1); 66 iz available = a->end - a->beg - padding; 67 if (available < 0 || count > available / len) 68 ASSERT(0 && "arena OOM\n"); 69 void *p = a->beg + padding; 70 a->beg += padding + count * len; 71 /* TODO: Performance? */ 72 return mem_clear(p, 0, count * len); 73 } 74 75 #define arena_capacity(a, t) arena_capacity_(a, sizeof(t), _Alignof(t)) 76 function iz 77 arena_capacity_(Arena *a, iz size, iz alignment) 78 { 79 iz padding = -(uintptr_t)a->beg & (alignment - 1); 80 iz available = a->end - a->beg - padding; 81 iz result = available / size; 82 return result; 83 } 84 85 enum { DA_INITIAL_CAP = 4 }; 86 #define da_reserve(a, s, n) \ 87 (s)->data = da_reserve_((a), (s)->data, &(s)->capacity, (s)->count + n, \ 88 _Alignof(typeof(*(s)->data)), sizeof(*(s)->data)) 89 90 #define da_push(a, s) \ 91 ((s)->count == (s)->capacity \ 92 ? da_reserve(a, s, 1), \ 93 (s)->data + (s)->count++ \ 94 : (s)->data + (s)->count++) 95 96 static void * 97 da_reserve_(Arena *a, void *data, iz *capacity, iz needed, iz align, iz size) 98 { 99 iz cap = *capacity; 100 101 /* NOTE(rnp): handle both 0 initialized DAs and DAs that need to be moved (they started 102 * on the stack or someone allocated something in the middle of the arena during usage) */ 103 if (!data || a->beg != (u8 *)data + cap * size) { 104 void *copy = arena_alloc(a, size, align, cap); 105 if (data) mem_copy(copy, data, cap * size); 106 data = copy; 107 } 108 109 if (!cap) cap = DA_INITIAL_CAP; 110 while (cap < needed) cap *= 2; 111 arena_alloc(a, size, align, cap - *capacity); 112 *capacity = cap; 113 return data; 114 } 115 116 static Arena 117 sub_arena(Arena *a, iz len, iz align) 118 { 119 Arena result = {0}; 120 121 iz padding = -(uintptr_t)a->beg & (align - 1); 122 result.beg = a->beg + padding; 123 result.end = result.beg + len; 124 arena_commit(a, len + padding); 125 126 return result; 127 } 128 129 static TempArena 130 begin_temp_arena(Arena *a) 131 { 132 TempArena result = {.arena = a, .old_beg = a->beg}; 133 return result; 134 } 135 136 static void 137 end_temp_arena(TempArena ta) 138 { 139 Arena *a = ta.arena; 140 if (a) { 141 ASSERT(a->beg >= ta.old_beg) 142 a->beg = ta.old_beg; 143 } 144 } 145 146 static u32 147 utf8_encode(u8 *out, u32 cp) 148 { 149 u32 result = 1; 150 if (cp <= 0x7F) { 151 out[0] = cp & 0x7F; 152 } else if (cp <= 0x7FF) { 153 result = 2; 154 out[0] = ((cp >> 6) & 0x1F) | 0xC0; 155 out[1] = ((cp >> 0) & 0x3F) | 0x80; 156 } else if (cp <= 0xFFFF) { 157 result = 3; 158 out[0] = ((cp >> 12) & 0x0F) | 0xE0; 159 out[1] = ((cp >> 6) & 0x3F) | 0x80; 160 out[2] = ((cp >> 0) & 0x3F) | 0x80; 161 } else if (cp <= 0x10FFFF) { 162 result = 4; 163 out[0] = ((cp >> 18) & 0x07) | 0xF0; 164 out[1] = ((cp >> 12) & 0x3F) | 0x80; 165 out[2] = ((cp >> 6) & 0x3F) | 0x80; 166 out[3] = ((cp >> 0) & 0x3F) | 0x80; 167 } else { 168 out[0] = '?'; 169 } 170 return result; 171 } 172 173 static UnicodeDecode 174 utf16_decode(u16 *data, iz length) 175 { 176 UnicodeDecode result = {.cp = U32_MAX}; 177 if (length) { 178 result.consumed = 1; 179 result.cp = data[0]; 180 if (length > 1 && BETWEEN(data[0], 0xD800, 0xDBFF) 181 && BETWEEN(data[1], 0xDC00, 0xDFFF)) 182 { 183 result.consumed = 2; 184 result.cp = ((data[0] - 0xD800) << 10) | ((data[1] - 0xDC00) + 0x10000); 185 } 186 } 187 return result; 188 } 189 190 static u32 191 utf16_encode(u16 *out, u32 cp) 192 { 193 u32 result = 1; 194 if (cp == U32_MAX) { 195 out[0] = '?'; 196 } else if (cp < 0x10000) { 197 out[0] = cp; 198 } else { 199 u32 value = cp - 0x10000; 200 out[0] = 0xD800 + (value >> 10u); 201 out[1] = 0xDC00 + (value & 0x3FFu); 202 result = 2; 203 } 204 return result; 205 } 206 207 function Stream 208 stream_alloc(Arena *a, iz cap) 209 { 210 Stream result = {.cap = cap}; 211 result.data = push_array(a, u8, cap); 212 return result; 213 } 214 215 function s8 216 stream_to_s8(Stream *s) 217 { 218 s8 result = s8(""); 219 if (!s->errors) result = (s8){.len = s->widx, .data = s->data}; 220 return result; 221 } 222 223 function void 224 stream_reset(Stream *s, iz index) 225 { 226 s->errors = s->cap <= index; 227 if (!s->errors) 228 s->widx = index; 229 } 230 231 static void 232 stream_commit(Stream *s, iz count) 233 { 234 s->errors |= !BETWEEN(s->widx + count, 0, s->cap); 235 if (!s->errors) 236 s->widx += count; 237 } 238 239 static void 240 stream_append(Stream *s, void *data, iz count) 241 { 242 s->errors |= (s->cap - s->widx) < count; 243 if (!s->errors) { 244 mem_copy(s->data + s->widx, data, count); 245 s->widx += count; 246 } 247 } 248 249 static void 250 stream_append_byte(Stream *s, u8 b) 251 { 252 stream_append(s, &b, 1); 253 } 254 255 static void 256 stream_pad(Stream *s, u8 b, i32 n) 257 { 258 while (n > 0) stream_append_byte(s, b), n--; 259 } 260 261 static void 262 stream_append_s8(Stream *s, s8 str) 263 { 264 stream_append(s, str.data, str.len); 265 } 266 267 #define stream_append_s8s(s, ...) stream_append_s8s_(s, (s8 []){__VA_ARGS__}, \ 268 sizeof((s8 []){__VA_ARGS__}) / sizeof(s8)) 269 function void 270 stream_append_s8s_(Stream *s, s8 *strs, iz count) 271 { 272 for (iz i = 0; i < count; i++) 273 stream_append(s, strs[i].data, strs[i].len); 274 } 275 276 static void 277 stream_append_u64(Stream *s, u64 n) 278 { 279 u8 tmp[64]; 280 u8 *end = tmp + sizeof(tmp); 281 u8 *beg = end; 282 do { *--beg = '0' + (n % 10); } while (n /= 10); 283 stream_append(s, beg, end - beg); 284 } 285 286 static void 287 stream_append_hex_u64(Stream *s, u64 n) 288 { 289 if (!s->errors) { 290 static u8 hex[16] = {"0123456789abcdef"}; 291 u8 buf[16]; 292 u8 *end = buf + sizeof(buf); 293 u8 *beg = end; 294 while (n) { 295 *--beg = hex[n & 0x0F]; 296 n >>= 4; 297 } 298 while (end - beg < 2) 299 *--beg = '0'; 300 stream_append(s, beg, end - beg); 301 } 302 } 303 304 static void 305 stream_append_i64(Stream *s, i64 n) 306 { 307 if (n < 0) { 308 stream_append_byte(s, '-'); 309 n *= -1; 310 } 311 stream_append_u64(s, n); 312 } 313 314 static void 315 stream_append_f64(Stream *s, f64 f, i64 prec) 316 { 317 if (f < 0) { 318 stream_append_byte(s, '-'); 319 f *= -1; 320 } 321 322 /* NOTE: round last digit */ 323 f += 0.5f / prec; 324 325 if (f >= (f64)(-1UL >> 1)) { 326 stream_append_s8(s, s8("inf")); 327 } else { 328 u64 integral = f; 329 u64 fraction = (f - integral) * prec; 330 stream_append_u64(s, integral); 331 stream_append_byte(s, '.'); 332 for (i64 i = prec / 10; i > 1; i /= 10) { 333 if (i > fraction) 334 stream_append_byte(s, '0'); 335 } 336 stream_append_u64(s, fraction); 337 } 338 } 339 340 static void 341 stream_append_f64_e(Stream *s, f64 f) 342 { 343 /* TODO: there should be a better way of doing this */ 344 #if 0 345 /* NOTE: we ignore subnormal numbers for now */ 346 union { f64 f; u64 u; } u = {.f = f}; 347 i32 exponent = ((u.u >> 52) & 0x7ff) - 1023; 348 f32 log_10_of_2 = 0.301f; 349 i32 scale = (exponent * log_10_of_2); 350 /* NOTE: normalize f */ 351 for (i32 i = ABS(scale); i > 0; i--) 352 f *= (scale > 0)? 0.1f : 10.0f; 353 #else 354 i32 scale = 0; 355 if (f != 0) { 356 while (f > 1) { 357 f *= 0.1f; 358 scale++; 359 } 360 while (f < 1) { 361 f *= 10.0f; 362 scale--; 363 } 364 } 365 #endif 366 367 i32 prec = 100; 368 stream_append_f64(s, f, prec); 369 stream_append_byte(s, 'e'); 370 stream_append_byte(s, scale >= 0? '+' : '-'); 371 for (i32 i = prec / 10; i > 1; i /= 10) 372 stream_append_byte(s, '0'); 373 stream_append_u64(s, ABS(scale)); 374 } 375 376 static void 377 stream_append_v2(Stream *s, v2 v) 378 { 379 stream_append_byte(s, '{'); 380 stream_append_f64(s, v.x, 100); 381 stream_append_s8(s, s8(", ")); 382 stream_append_f64(s, v.y, 100); 383 stream_append_byte(s, '}'); 384 } 385 386 function Stream 387 arena_stream(Arena a) 388 { 389 Stream result = {0}; 390 result.data = a.beg; 391 result.cap = a.end - a.beg; 392 return result; 393 } 394 395 function s8 396 arena_stream_commit(Arena *a, Stream *s) 397 { 398 ASSERT(s->data == a->beg); 399 s8 result = stream_to_s8(s); 400 arena_commit(a, result.len); 401 return result; 402 } 403 404 function s8 405 arena_stream_commit_zero(Arena *a, Stream *s) 406 { 407 b32 error = s->errors || s->widx == s->cap; 408 if (!error) 409 s->data[s->widx] = 0; 410 s8 result = stream_to_s8(s); 411 arena_commit(a, result.len + 1); 412 return result; 413 } 414 415 /* NOTE(rnp): FNV-1a hash */ 416 function u64 417 s8_hash(s8 v) 418 { 419 u64 h = 0x3243f6a8885a308d; /* digits of pi */ 420 for (; v.len; v.len--) { 421 h ^= v.data[v.len - 1] & 0xFF; 422 h *= 1111111111111111111; /* random prime */ 423 } 424 return h; 425 } 426 427 static s8 428 c_str_to_s8(char *cstr) 429 { 430 s8 result = {.data = (u8 *)cstr}; 431 if (cstr) { while (*cstr) { result.len++; cstr++; } } 432 return result; 433 } 434 435 /* NOTE(rnp): returns < 0 if byte is not found */ 436 static iz 437 s8_scan_backwards(s8 s, u8 byte) 438 { 439 iz result = s.len; 440 while (result && s.data[result - 1] != byte) result--; 441 result--; 442 return result; 443 } 444 445 static s8 446 s8_cut_head(s8 s, iz cut) 447 { 448 s8 result = s; 449 if (cut > 0) { 450 result.data += cut; 451 result.len -= cut; 452 } 453 return result; 454 } 455 456 function s8 457 s8_alloc(Arena *a, iz len) 458 { 459 s8 result = {.data = push_array(a, u8, len), .len = len}; 460 return result; 461 } 462 463 function s8 464 s16_to_s8(Arena *a, s16 in) 465 { 466 s8 result = s8(""); 467 if (in.len) { 468 iz commit = in.len * 4; 469 iz length = 0; 470 u8 *data = arena_commit(a, commit + 1); 471 u16 *beg = in.data; 472 u16 *end = in.data + in.len; 473 while (beg < end) { 474 UnicodeDecode decode = utf16_decode(beg, end - beg); 475 length += utf8_encode(data + length, decode.cp); 476 beg += decode.consumed; 477 } 478 data[length] = 0; 479 result = (s8){.len = length, .data = data}; 480 arena_pop(a, commit - length); 481 } 482 return result; 483 } 484 485 static s16 486 s8_to_s16(Arena *a, s8 in) 487 { 488 s16 result = {0}; 489 if (in.len) { 490 iz required = 2 * in.len + 1; 491 u16 *data = push_array(a, u16, required); 492 iz length = 0; 493 /* TODO(rnp): utf8_decode */ 494 for (iz i = 0; i < in.len; i++) { 495 u32 cp = in.data[i]; 496 length += utf16_encode(data + length, cp); 497 } 498 result = (s16){.len = length, .data = data}; 499 arena_pop(a, required - length); 500 } 501 return result; 502 } 503 504 static s8 505 push_s8(Arena *a, s8 str) 506 { 507 s8 result = s8_alloc(a, str.len); 508 mem_copy(result.data, str.data, result.len); 509 return result; 510 } 511 512 static s8 513 push_s8_zero(Arena *a, s8 str) 514 { 515 s8 result = s8_alloc(a, str.len + 1); 516 result.len -= 1; 517 mem_copy(result.data, str.data, result.len); 518 return result; 519 } 520 521 static u32 522 round_down_power_of_2(u32 a) 523 { 524 u32 result = 0x80000000UL >> clz_u32(a); 525 return result; 526 } 527 528 static b32 529 uv2_equal(uv2 a, uv2 b) 530 { 531 return a.x == b.x && a.y == b.y; 532 } 533 534 static b32 535 uv3_equal(uv3 a, uv3 b) 536 { 537 return a.x == b.x && a.y == b.y && a.z == b.z; 538 } 539 540 static v3 541 cross(v3 a, v3 b) 542 { 543 v3 result = { 544 .x = a.y * b.z - a.z * b.y, 545 .y = a.z * b.x - a.x * b.z, 546 .z = a.x * b.y - a.y * b.x, 547 }; 548 return result; 549 } 550 551 static v3 552 sub_v3(v3 a, v3 b) 553 { 554 v3 result = { 555 .x = a.x - b.x, 556 .y = a.y - b.y, 557 .z = a.z - b.z, 558 }; 559 return result; 560 } 561 562 static f32 563 length_v3(v3 a) 564 { 565 f32 result = a.x * a.x + a.y * a.y + a.z * a.z; 566 return result; 567 } 568 569 static v3 570 normalize_v3(v3 a) 571 { 572 f32 length = length_v3(a); 573 v3 result = {.x = a.x / length, .y = a.y / length, .z = a.z / length}; 574 return result; 575 } 576 577 static v2 578 clamp_v2_rect(v2 v, Rect r) 579 { 580 v2 result = v; 581 result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x); 582 result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y); 583 return result; 584 } 585 586 static v2 587 add_v2(v2 a, v2 b) 588 { 589 v2 result = { 590 .x = a.x + b.x, 591 .y = a.y + b.y, 592 }; 593 return result; 594 } 595 596 static v2 597 sub_v2(v2 a, v2 b) 598 { 599 v2 result = { 600 .x = a.x - b.x, 601 .y = a.y - b.y, 602 }; 603 return result; 604 } 605 606 static v2 607 scale_v2(v2 a, f32 scale) 608 { 609 v2 result = { 610 .x = a.x * scale, 611 .y = a.y * scale, 612 }; 613 return result; 614 } 615 616 static v2 617 mul_v2(v2 a, v2 b) 618 { 619 v2 result = { 620 .x = a.x * b.x, 621 .y = a.y * b.y, 622 }; 623 return result; 624 } 625 626 function v2 627 div_v2(v2 a, v2 b) 628 { 629 v2 result; 630 result.x = a.x / b.x; 631 result.y = a.y / b.y; 632 return result; 633 } 634 635 636 static v2 637 floor_v2(v2 a) 638 { 639 v2 result; 640 result.x = (u32)a.x; 641 result.y = (u32)a.y; 642 return result; 643 } 644 645 static f32 646 magnitude_v2(v2 a) 647 { 648 f32 result = sqrt_f32(a.x * a.x + a.y * a.y); 649 return result; 650 } 651 652 function uv4 653 uv4_from_u32_array(u32 v[4]) 654 { 655 uv4 result; 656 result.E[0] = v[0]; 657 result.E[1] = v[1]; 658 result.E[2] = v[2]; 659 result.E[3] = v[3]; 660 return result; 661 } 662 663 function b32 664 uv4_equal(uv4 a, uv4 b) 665 { 666 return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; 667 } 668 669 function v4 670 v4_from_f32_array(f32 v[4]) 671 { 672 v4 result; 673 result.E[0] = v[0]; 674 result.E[1] = v[1]; 675 result.E[2] = v[2]; 676 result.E[3] = v[3]; 677 return result; 678 } 679 680 function v4 681 sub_v4(v4 a, v4 b) 682 { 683 v4 result; 684 result.x = a.x - b.x; 685 result.y = a.y - b.y; 686 result.z = a.z - b.z; 687 result.w = a.w - b.w; 688 return result; 689 } 690 691 static void 692 split_rect_horizontal(Rect rect, f32 fraction, Rect *left, Rect *right) 693 { 694 if (left) { 695 left->pos = rect.pos; 696 left->size.h = rect.size.h; 697 left->size.w = rect.size.w * fraction; 698 } 699 if (right) { 700 right->pos = rect.pos; 701 right->pos.x += rect.size.w * fraction; 702 right->size.h = rect.size.h; 703 right->size.w = rect.size.w * (1.0f - fraction); 704 } 705 } 706 707 static void 708 split_rect_vertical(Rect rect, f32 fraction, Rect *top, Rect *bot) 709 { 710 if (top) { 711 top->pos = rect.pos; 712 top->size.w = rect.size.w; 713 top->size.h = rect.size.h * fraction; 714 } 715 if (bot) { 716 bot->pos = rect.pos; 717 bot->pos.y += rect.size.h * fraction; 718 bot->size.w = rect.size.w; 719 bot->size.h = rect.size.h * (1.0f - fraction); 720 } 721 } 722 723 static void 724 cut_rect_horizontal(Rect rect, f32 at, Rect *left, Rect *right) 725 { 726 at = MIN(at, rect.size.w); 727 if (left) { 728 *left = rect; 729 left->size.w = at; 730 } 731 if (right) { 732 *right = rect; 733 right->pos.x += at; 734 right->size.w -= at; 735 } 736 } 737 738 static void 739 cut_rect_vertical(Rect rect, f32 at, Rect *top, Rect *bot) 740 { 741 at = MIN(at, rect.size.h); 742 if (top) { 743 *top = rect; 744 top->size.h = at; 745 } 746 if (bot) { 747 *bot = rect; 748 bot->pos.y += at; 749 bot->size.h -= at; 750 } 751 } 752 753 static f64 754 parse_f64(s8 s) 755 { 756 f64 integral = 0, fractional = 0, sign = 1; 757 758 if (s.len && *s.data == '-') { 759 sign = -1; 760 s.data++; 761 s.len--; 762 } 763 764 while (s.len && *s.data != '.') { 765 integral *= 10; 766 integral += *s.data - '0'; 767 s.data++; 768 s.len--; 769 } 770 771 if (*s.data == '.') { s.data++; s.len--; } 772 773 while (s.len) { 774 ASSERT(s.data[s.len - 1] != '.'); 775 fractional /= 10; 776 fractional += (f64)(s.data[--s.len] - '0') / 10.0; 777 } 778 f64 result = sign * (integral + fractional); 779 return result; 780 } 781 782 function FileWatchDirectory * 783 lookup_file_watch_directory(FileWatchContext *ctx, u64 hash) 784 { 785 FileWatchDirectory *result = 0; 786 for (u32 i = 0; i < ctx->count; i++) { 787 FileWatchDirectory *test = ctx->data + i; 788 if (test->hash == hash) { 789 result = test; 790 break; 791 } 792 } 793 return result; 794 } 795 796 function void 797 fill_kronecker_sub_matrix(i32 *out, i32 out_stride, i32 scale, i32 *b, uv2 b_dim) 798 { 799 f32x4 vscale = dup_f32x4(scale); 800 for (u32 i = 0; i < b_dim.y; i++) { 801 for (u32 j = 0; j < b_dim.x; j += 4, b += 4) { 802 f32x4 vb = cvt_i32x4_f32x4(load_i32x4(b)); 803 store_i32x4(cvt_f32x4_i32x4(mul_f32x4(vscale, vb)), out + j); 804 } 805 out += out_stride; 806 } 807 } 808 809 /* NOTE: this won't check for valid space/etc and assumes row major order */ 810 static void 811 kronecker_product(i32 *out, i32 *a, uv2 a_dim, i32 *b, uv2 b_dim) 812 { 813 uv2 out_dim = {.x = a_dim.x * b_dim.x, .y = a_dim.y * b_dim.y}; 814 ASSERT(out_dim.y % 4 == 0); 815 for (u32 i = 0; i < a_dim.y; i++) { 816 i32 *vout = out; 817 for (u32 j = 0; j < a_dim.x; j++, a++) { 818 fill_kronecker_sub_matrix(vout, out_dim.y, *a, b, b_dim); 819 vout += b_dim.y; 820 } 821 out += out_dim.y * b_dim.x; 822 } 823 } 824 825 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */ 826 function i32 * 827 make_hadamard_transpose(Arena *a, u32 dim) 828 { 829 i32 *result = 0; 830 831 b32 power_of_2 = ISPOWEROF2(dim); 832 b32 multiple_of_12 = dim % 12 == 0; 833 iz elements = dim * dim; 834 835 if (dim && (power_of_2 || multiple_of_12) && 836 arena_capacity(a, i32) >= elements * (1 + !power_of_2)) 837 { 838 if (!power_of_2) dim /= 12; 839 result = push_array(a, i32, elements); 840 841 Arena tmp = *a; 842 i32 *m = power_of_2 ? result : push_array(&tmp, i32, elements); 843 844 #define IND(i, j) ((i) * dim + (j)) 845 m[0] = 1; 846 for (u32 k = 1; k < dim; k *= 2) { 847 for (u32 i = 0; i < k; i++) { 848 for (u32 j = 0; j < k; j++) { 849 i32 val = m[IND(i, j)]; 850 m[IND(i + k, j)] = val; 851 m[IND(i, j + k)] = val; 852 m[IND(i + k, j + k)] = -val; 853 } 854 } 855 } 856 #undef IND 857 858 if (!power_of_2) { 859 kronecker_product(result, m, (uv2){.x = dim, .y = dim}, 860 hadamard_12_12_transpose, (uv2){.x = 12, .y = 12}); 861 } 862 } 863 864 return result; 865 }