util.c (10682B)
1 /* See LICENSE for license details. */ 2 static i32 hadamard_12_12_transpose[] = { 3 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, 5 1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 6 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, 7 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 8 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 9 1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 10 1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, 11 1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, 12 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, 13 1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 14 1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, 15 }; 16 17 #define zero_struct(s) mem_clear(s, 0, sizeof(*s)); 18 static void * 19 mem_clear(void *p_, u8 c, size len) 20 { 21 u8 *p = p_; 22 while (len) p[--len] = c; 23 return p; 24 } 25 26 static void 27 mem_copy(void *restrict src, void *restrict dest, size n) 28 { 29 ASSERT(n >= 0); 30 u8 *s = src, *d = dest; 31 for (; n; n--) *d++ = *s++; 32 } 33 34 static void 35 mem_move(u8 *src, u8 *dest, size n) 36 { 37 if (dest < src) mem_copy(src, dest, n); 38 else while (n) { n--; dest[n] = src[n]; } 39 } 40 41 42 static void 43 arena_commit(Arena *a, size size) 44 { 45 ASSERT(a->end - a->beg >= size); 46 a->beg += size; 47 } 48 49 #define alloc(a, t, n) (t *)alloc_(a, sizeof(t), _Alignof(t), n) 50 #define push_struct(a, t) (t *)alloc_(a, sizeof(t), _Alignof(t), 1) 51 static void * 52 alloc_(Arena *a, size len, size align, size count) 53 { 54 /* NOTE: special case 0 arena */ 55 if (a->beg == 0) 56 return 0; 57 58 size padding = -(uintptr_t)a->beg & (align - 1); 59 size available = a->end - a->beg - padding; 60 if (available < 0 || count > available / len) 61 ASSERT(0 && "arena OOM\n"); 62 void *p = a->beg + padding; 63 a->beg += padding + count * len; 64 /* TODO: Performance? */ 65 return mem_clear(p, 0, count * len); 66 } 67 68 static Arena 69 sub_arena(Arena *a, size len, size align) 70 { 71 Arena result = {0}; 72 73 size padding = -(uintptr_t)a->beg & (align - 1); 74 result.beg = a->beg + padding; 75 result.end = result.beg + len; 76 arena_commit(a, len + padding); 77 78 return result; 79 } 80 81 static TempArena 82 begin_temp_arena(Arena *a) 83 { 84 TempArena result = {.arena = a, .old_beg = a->beg}; 85 return result; 86 } 87 88 static void 89 end_temp_arena(TempArena ta) 90 { 91 Arena *a = ta.arena; 92 if (a) { 93 ASSERT(a->beg >= ta.old_beg) 94 a->beg = ta.old_beg; 95 } 96 } 97 98 static Stream 99 arena_stream(Arena *a) 100 { 101 Stream result = {0}; 102 result.data = a->beg; 103 result.cap = a->end - a->beg; 104 a->beg = a->end; 105 return result; 106 } 107 108 static Stream 109 stream_alloc(Arena *a, size cap) 110 { 111 Stream result = {.cap = cap}; 112 result.data = alloc(a, u8, cap); 113 return result; 114 } 115 116 static s8 117 stream_to_s8(Stream *s) 118 { 119 s8 result = {.len = s->widx, .data = s->data}; 120 return result; 121 } 122 123 static void 124 stream_append_byte(Stream *s, u8 b) 125 { 126 s->errors |= s->widx + 1 > s->cap; 127 if (!s->errors) 128 s->data[s->widx++] = b; 129 } 130 131 static void 132 stream_append_s8(Stream *s, s8 str) 133 { 134 s->errors |= (s->cap - s->widx) < str.len; 135 if (!s->errors) { 136 mem_copy(str.data, s->data + s->widx, str.len); 137 s->widx += str.len; 138 } 139 } 140 141 static void 142 stream_append_s8_array(Stream *s, s8 *strs, size count) 143 { 144 while (count > 0) { 145 stream_append_s8(s, *strs); 146 strs++; 147 count--; 148 } 149 } 150 151 static void 152 stream_append_u64(Stream *s, u64 n) 153 { 154 u8 tmp[64]; 155 u8 *end = tmp + sizeof(tmp); 156 u8 *beg = end; 157 do { *--beg = '0' + (n % 10); } while (n /= 10); 158 stream_append_s8(s, (s8){.len = end - beg, .data = beg}); 159 } 160 161 static void 162 stream_append_i64(Stream *s, i64 n) 163 { 164 if (n < 0) { 165 stream_append_byte(s, '-'); 166 n *= -1; 167 } 168 stream_append_u64(s, n); 169 } 170 171 static void 172 stream_append_f64(Stream *s, f64 f, i64 prec) 173 { 174 if (f < 0) { 175 stream_append_byte(s, '-'); 176 f *= -1; 177 } 178 179 /* NOTE: round last digit */ 180 f += 0.5f / prec; 181 182 if (f >= (f64)(-1UL >> 1)) { 183 stream_append_s8(s, s8("inf")); 184 } else { 185 u64 integral = f; 186 u64 fraction = (f - integral) * prec; 187 stream_append_u64(s, integral); 188 stream_append_byte(s, '.'); 189 for (i64 i = prec / 10; i > 1; i /= 10) { 190 if (i > fraction) 191 stream_append_byte(s, '0'); 192 } 193 stream_append_u64(s, fraction); 194 } 195 } 196 197 static void 198 stream_append_f64_e(Stream *s, f64 f) 199 { 200 /* TODO: there should be a better way of doing this */ 201 #if 0 202 /* NOTE: we ignore subnormal numbers for now */ 203 union { f64 f; u64 u; } u = {.f = f}; 204 i32 exponent = ((u.u >> 52) & 0x7ff) - 1023; 205 f32 log_10_of_2 = 0.301f; 206 i32 scale = (exponent * log_10_of_2); 207 /* NOTE: normalize f */ 208 for (i32 i = ABS(scale); i > 0; i--) 209 f *= (scale > 0)? 0.1f : 10.0f; 210 #else 211 i32 scale = 0; 212 if (f != 0) { 213 while (f > 1) { 214 f *= 0.1f; 215 scale++; 216 } 217 while (f < 1) { 218 f *= 10.0f; 219 scale--; 220 } 221 } 222 #endif 223 224 i32 prec = 100; 225 stream_append_f64(s, f, prec); 226 stream_append_byte(s, 'e'); 227 stream_append_byte(s, scale >= 0? '+' : '-'); 228 for (i32 i = prec / 10; i > 1; i /= 10) 229 stream_append_byte(s, '0'); 230 stream_append_u64(s, ABS(scale)); 231 } 232 233 static void 234 stream_append_v2(Stream *s, v2 v) 235 { 236 stream_append_byte(s, '{'); 237 stream_append_f64(s, v.x, 100); 238 stream_append_s8(s, s8(", ")); 239 stream_append_f64(s, v.y, 100); 240 stream_append_byte(s, '}'); 241 } 242 243 static void 244 stream_append_variable(Stream *s, Variable *var) 245 { 246 switch (var->type) { 247 case VT_F32: { 248 f32 *f32_val = var->store; 249 stream_append_f64(s, *f32_val * var->display_scale, 100); 250 } break; 251 case VT_I32: { 252 i32 *i32_val = var->store; 253 stream_append_i64(s, *i32_val * var->display_scale); 254 } break; 255 default: INVALID_CODE_PATH; 256 } 257 } 258 259 /* NOTE(rnp): FNV-1a hash */ 260 static u64 261 s8_hash(s8 v) 262 { 263 u64 h = 0x3243f6a8885a308d; /* digits of pi */ 264 for (; v.len; v.len--) { 265 h ^= v.data[v.len - 1] & 0xFF; 266 h *= 1111111111111111111; /* random prime */ 267 } 268 return h; 269 } 270 271 static s8 272 cstr_to_s8(char *cstr) 273 { 274 s8 result = {.data = (u8 *)cstr}; 275 while (*cstr) { result.len++; cstr++; } 276 return result; 277 } 278 279 /* NOTE(rnp): returns < 0 if byte is not found */ 280 static size 281 s8_scan_backwards(s8 s, u8 byte) 282 { 283 size result = s.len; 284 while (result && s.data[result - 1] != byte) result--; 285 result--; 286 return result; 287 } 288 289 static s8 290 s8_cut_head(s8 s, size cut) 291 { 292 s8 result = s; 293 if (cut > 0) { 294 result.data += cut; 295 result.len -= cut; 296 } 297 return result; 298 } 299 300 static s8 301 s8alloc(Arena *a, size len) 302 { 303 return (s8){ .data = alloc(a, u8, len), .len = len }; 304 } 305 306 static s8 307 push_s8(Arena *a, s8 str) 308 { 309 s8 result = s8alloc(a, str.len); 310 mem_copy(str.data, result.data, result.len); 311 return result; 312 } 313 314 static u32 315 round_down_power_of_2(u32 a) 316 { 317 u32 result = 0x80000000UL >> clz_u32(a); 318 return result; 319 } 320 321 static b32 322 uv2_equal(uv2 a, uv2 b) 323 { 324 return a.x == b.x && a.y == b.y; 325 } 326 327 static b32 328 uv3_equal(uv3 a, uv3 b) 329 { 330 return a.x == b.x && a.y == b.y && a.z == b.z; 331 } 332 333 static v3 334 cross(v3 a, v3 b) 335 { 336 v3 result = { 337 .x = a.y * b.z - a.z * b.y, 338 .y = a.z * b.x - a.x * b.z, 339 .z = a.x * b.y - a.y * b.x, 340 }; 341 return result; 342 } 343 344 static v3 345 sub_v3(v3 a, v3 b) 346 { 347 v3 result = { 348 .x = a.x - b.x, 349 .y = a.y - b.y, 350 .z = a.z - b.z, 351 }; 352 return result; 353 } 354 355 static f32 356 length_v3(v3 a) 357 { 358 f32 result = a.x * a.x + a.y * a.y + a.z * a.z; 359 return result; 360 } 361 362 static v3 363 normalize_v3(v3 a) 364 { 365 f32 length = length_v3(a); 366 v3 result = {.x = a.x / length, .y = a.y / length, .z = a.z / length}; 367 return result; 368 } 369 370 static v2 371 clamp_v2_rect(v2 v, Rect r) 372 { 373 v2 result = v; 374 result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x); 375 result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y); 376 return result; 377 } 378 379 static v2 380 sub_v2(v2 a, v2 b) 381 { 382 v2 result = { 383 .x = a.x - b.x, 384 .y = a.y - b.y, 385 }; 386 return result; 387 } 388 389 static v2 390 mul_v2(v2 a, v2 b) 391 { 392 v2 result = { 393 .x = a.x * b.x, 394 .y = a.y * b.y, 395 }; 396 return result; 397 } 398 399 400 static f32 401 magnitude_v2(v2 a) 402 { 403 f32 result = sqrt_f32(a.x * a.x + a.y * a.y); 404 return result; 405 } 406 407 static b32 408 uv4_equal(uv4 a, uv4 b) 409 { 410 return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; 411 } 412 413 static v4 414 sub_v4(v4 a, v4 b) 415 { 416 v4 result; 417 result.x = a.x - b.x; 418 result.y = a.y - b.y; 419 result.z = a.z - b.z; 420 result.w = a.w - b.w; 421 return result; 422 } 423 424 static f64 425 parse_f64(s8 s) 426 { 427 f64 integral = 0, fractional = 0, sign = 1; 428 429 if (s.len && *s.data == '-') { 430 sign = -1; 431 s.data++; 432 s.len--; 433 } 434 435 while (s.len && *s.data != '.') { 436 integral *= 10; 437 integral += *s.data - '0'; 438 s.data++; 439 s.len--; 440 } 441 442 if (*s.data == '.') { s.data++; s.len--; } 443 444 while (s.len) { 445 ASSERT(s.data[s.len - 1] != '.'); 446 fractional /= 10; 447 fractional += (f64)(s.data[--s.len] - '0') / 10.0; 448 } 449 f64 result = sign * (integral + fractional); 450 return result; 451 } 452 453 static FileWatchDirectory * 454 lookup_file_watch_directory(FileWatchContext *ctx, u64 hash) 455 { 456 FileWatchDirectory *result = 0; 457 458 for (u32 i = 0; i < ctx->directory_watch_count; i++) { 459 FileWatchDirectory *test = ctx->directory_watches + i; 460 if (test->hash == hash) { 461 result = test; 462 break; 463 } 464 } 465 466 return result; 467 } 468 469 static void 470 insert_file_watch(FileWatchDirectory *dir, s8 name, iptr user_data, file_watch_callback *callback) 471 { 472 ASSERT(dir->file_watch_count < ARRAY_COUNT(dir->file_watches)); 473 FileWatch *fw = dir->file_watches + dir->file_watch_count++; 474 fw->hash = s8_hash(name); 475 fw->user_data = user_data; 476 fw->callback = callback; 477 } 478 479 static void 480 fill_kronecker_sub_matrix(i32 *out, i32 out_stride, i32 scale, i32 *b, uv2 b_dim) 481 { 482 f32x4 vscale = dup_f32x4(scale); 483 for (u32 i = 0; i < b_dim.y; i++) { 484 for (u32 j = 0; j < b_dim.x; j += 4, b += 4) { 485 f32x4 vb = cvt_i32x4_f32x4(load_i32x4(b)); 486 store_i32x4(cvt_f32x4_i32x4(mul_f32x4(vscale, vb)), out + j); 487 } 488 out += out_stride; 489 } 490 } 491 492 /* NOTE: this won't check for valid space/etc and assumes row major order */ 493 static void 494 kronecker_product(i32 *out, i32 *a, uv2 a_dim, i32 *b, uv2 b_dim) 495 { 496 uv2 out_dim = {.x = a_dim.x * b_dim.x, .y = a_dim.y * b_dim.y}; 497 ASSERT(out_dim.y % 4 == 0); 498 for (u32 i = 0; i < a_dim.y; i++) { 499 i32 *vout = out; 500 for (u32 j = 0; j < a_dim.x; j++, a++) { 501 fill_kronecker_sub_matrix(vout, out_dim.y, *a, b, b_dim); 502 vout += b_dim.y; 503 } 504 out += out_dim.y * b_dim.x; 505 } 506 } 507 508 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */ 509 static void 510 fill_hadamard_transpose(i32 *out, i32 *tmp, u32 dim) 511 { 512 ASSERT(dim); 513 b32 power_of_2 = ISPOWEROF2(dim); 514 b32 multiple_of_12 = dim % 12 == 0; 515 516 if (!power_of_2 && !multiple_of_12) 517 return; 518 519 if (!power_of_2) { 520 ASSERT(multiple_of_12); 521 dim /= 12; 522 } 523 524 i32 *m; 525 if (power_of_2) m = out; 526 else m = tmp; 527 528 #define IND(i, j) ((i) * dim + (j)) 529 m[0] = 1; 530 for (u32 k = 1; k < dim; k *= 2) { 531 for (u32 i = 0; i < k; i++) { 532 for (u32 j = 0; j < k; j++) { 533 i32 val = m[IND(i, j)]; 534 m[IND(i + k, j)] = val; 535 m[IND(i, j + k)] = val; 536 m[IND(i + k, j + k)] = -val; 537 } 538 } 539 } 540 #undef IND 541 542 if (!power_of_2) 543 kronecker_product(out, tmp, (uv2){.x = dim, .y = dim}, hadamard_12_12_transpose, 544 (uv2){.x = 12, .y = 12}); 545 }