math.c (17870B)
1 #include "external/cephes.c" 2 3 function void 4 fill_kronecker_sub_matrix(i32 *out, i32 out_stride, i32 scale, i32 *b, iv2 b_dim) 5 { 6 f32x4 vscale = dup_f32x4((f32)scale); 7 for (i32 i = 0; i < b_dim.y; i++) { 8 for (i32 j = 0; j < b_dim.x; j += 4, b += 4) { 9 f32x4 vb = cvt_i32x4_f32x4(load_i32x4(b)); 10 store_i32x4(out + j, cvt_f32x4_i32x4(mul_f32x4(vscale, vb))); 11 } 12 out += out_stride; 13 } 14 } 15 16 /* NOTE: this won't check for valid space/etc and assumes row major order */ 17 function void 18 kronecker_product(i32 *out, i32 *a, iv2 a_dim, i32 *b, iv2 b_dim) 19 { 20 iv2 out_dim = {{a_dim.x * b_dim.x, a_dim.y * b_dim.y}}; 21 assert(out_dim.y % 4 == 0); 22 for (i32 i = 0; i < a_dim.y; i++) { 23 i32 *vout = out; 24 for (i32 j = 0; j < a_dim.x; j++, a++) { 25 fill_kronecker_sub_matrix(vout, out_dim.y, *a, b, b_dim); 26 vout += b_dim.y; 27 } 28 out += out_dim.y * b_dim.x; 29 } 30 } 31 32 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */ 33 function i32 * 34 make_hadamard_transpose(Arena *a, i32 dim) 35 { 36 read_only local_persist i32 hadamard_12_12_transpose[] = { 37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, 39 1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 40 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, 41 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 42 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 43 1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 44 1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, 45 1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, 46 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, 47 1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 48 1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, 49 }; 50 51 read_only local_persist i32 hadamard_20_20_transpose[] = { 52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 53 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, 54 1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, 55 1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 56 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 57 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, 58 1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, 59 1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, 60 1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, 61 1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 62 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, 63 1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 64 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, 65 1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 66 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 67 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 68 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 69 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, 70 1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, 71 1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 72 }; 73 74 i32 *result = 0; 75 76 b32 power_of_2 = ISPOWEROF2(dim); 77 b32 multiple_of_12 = dim % 12 == 0; 78 b32 multiple_of_20 = dim % 20 == 0; 79 iz elements = dim * dim; 80 81 i32 base_dim = 0; 82 if (power_of_2) { 83 base_dim = dim; 84 } else if (multiple_of_20 && ISPOWEROF2(dim / 20)) { 85 base_dim = 20; 86 dim /= 20; 87 } else if (multiple_of_12 && ISPOWEROF2(dim / 12)) { 88 base_dim = 12; 89 dim /= 12; 90 } 91 92 if (ISPOWEROF2(dim) && base_dim && arena_capacity(a, i32) >= elements * (1 + (dim != base_dim))) { 93 result = push_array(a, i32, elements); 94 95 Arena tmp = *a; 96 i32 *m = dim == base_dim ? result : push_array(&tmp, i32, elements); 97 98 #define IND(i, j) ((i) * dim + (j)) 99 m[0] = 1; 100 for (i32 k = 1; k < dim; k *= 2) { 101 for (i32 i = 0; i < k; i++) { 102 for (i32 j = 0; j < k; j++) { 103 i32 val = m[IND(i, j)]; 104 m[IND(i + k, j)] = val; 105 m[IND(i, j + k)] = val; 106 m[IND(i + k, j + k)] = -val; 107 } 108 } 109 } 110 #undef IND 111 112 i32 *m2 = 0; 113 iv2 m2_dim; 114 switch (base_dim) { 115 case 12:{ m2 = hadamard_12_12_transpose; m2_dim = (iv2){{12, 12}}; }break; 116 case 20:{ m2 = hadamard_20_20_transpose; m2_dim = (iv2){{20, 20}}; }break; 117 } 118 if (m2) kronecker_product(result, m, (iv2){{dim, dim}}, m2, m2_dim); 119 } 120 121 return result; 122 } 123 124 /* NOTE(rnp): adapted from "Discrete Time Signal Processing" (Oppenheim) */ 125 function f32 * 126 kaiser_low_pass_filter(Arena *arena, f32 cutoff_frequency, f32 sampling_frequency, f32 beta, i32 length) 127 { 128 f32 *result = push_array(arena, f32, length); 129 f32 wc = 2 * PI * cutoff_frequency / sampling_frequency; 130 f32 a = (f32)length / 2.0f; 131 f32 pi_i0_b = PI * (f32)cephes_i0(beta); 132 133 for (i32 n = 0; n < length; n++) { 134 f32 t = (f32)n - a; 135 f32 impulse = !f32_cmp(t, 0) ? sin_f32(wc * t) / t : wc; 136 t = t / a; 137 f32 window = (f32)cephes_i0(beta * sqrt_f32(1 - t * t)) / pi_i0_b; 138 result[n] = impulse * window; 139 } 140 141 return result; 142 } 143 144 function b32 145 iv2_equal(iv2 a, iv2 b) 146 { 147 b32 result = a.x == b.x && a.y == b.y; 148 return result; 149 } 150 151 function b32 152 iv3_equal(iv3 a, iv3 b) 153 { 154 b32 result = a.x == b.x && a.y == b.y && a.z == b.z; 155 return result; 156 } 157 158 function v2 159 clamp_v2_rect(v2 v, Rect r) 160 { 161 v2 result = v; 162 result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x); 163 result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y); 164 return result; 165 } 166 167 function v2 168 v2_scale(v2 a, f32 scale) 169 { 170 v2 result; 171 result.x = a.x * scale; 172 result.y = a.y * scale; 173 return result; 174 } 175 176 function v2 177 v2_add(v2 a, v2 b) 178 { 179 v2 result; 180 result.x = a.x + b.x; 181 result.y = a.y + b.y; 182 return result; 183 } 184 185 function v2 186 v2_sub(v2 a, v2 b) 187 { 188 v2 result = v2_add(a, v2_scale(b, -1.0f)); 189 return result; 190 } 191 192 function v2 193 v2_mul(v2 a, v2 b) 194 { 195 v2 result; 196 result.x = a.x * b.x; 197 result.y = a.y * b.y; 198 return result; 199 } 200 201 function v2 202 v2_div(v2 a, v2 b) 203 { 204 v2 result; 205 result.x = a.x / b.x; 206 result.y = a.y / b.y; 207 return result; 208 } 209 210 function v2 211 v2_floor(v2 a) 212 { 213 v2 result; 214 result.x = (f32)((i32)a.x); 215 result.y = (f32)((i32)a.y); 216 return result; 217 } 218 219 function f32 220 v2_magnitude(v2 a) 221 { 222 f32 result = sqrt_f32(a.x * a.x + a.y * a.y); 223 return result; 224 } 225 226 function v3 227 cross(v3 a, v3 b) 228 { 229 v3 result; 230 result.x = a.y * b.z - a.z * b.y; 231 result.y = a.z * b.x - a.x * b.z; 232 result.z = a.x * b.y - a.y * b.x; 233 return result; 234 } 235 236 function v3 237 v3_abs(v3 a) 238 { 239 v3 result; 240 result.x = ABS(a.x); 241 result.y = ABS(a.y); 242 result.z = ABS(a.z); 243 return result; 244 } 245 246 function v3 247 v3_scale(v3 a, f32 scale) 248 { 249 v3 result; 250 result.x = scale * a.x; 251 result.y = scale * a.y; 252 result.z = scale * a.z; 253 return result; 254 } 255 256 function v3 257 v3_add(v3 a, v3 b) 258 { 259 v3 result; 260 result.x = a.x + b.x; 261 result.y = a.y + b.y; 262 result.z = a.z + b.z; 263 return result; 264 } 265 266 function v3 267 v3_sub(v3 a, v3 b) 268 { 269 v3 result = v3_add(a, v3_scale(b, -1.0f)); 270 return result; 271 } 272 273 function v3 274 v3_div(v3 a, v3 b) 275 { 276 v3 result; 277 result.x = a.x / b.x; 278 result.y = a.y / b.y; 279 result.z = a.z / b.z; 280 return result; 281 } 282 283 function f32 284 v3_dot(v3 a, v3 b) 285 { 286 f32 result = a.x * b.x + a.y * b.y + a.z * b.z; 287 return result; 288 } 289 290 function f32 291 v3_magnitude_squared(v3 a) 292 { 293 f32 result = v3_dot(a, a); 294 return result; 295 } 296 297 function f32 298 v3_magnitude(v3 a) 299 { 300 f32 result = sqrt_f32(v3_dot(a, a)); 301 return result; 302 } 303 304 function v3 305 v3_normalize(v3 a) 306 { 307 v3 result = v3_scale(a, 1.0f / v3_magnitude(a)); 308 return result; 309 } 310 311 function uv4 312 uv4_from_u32_array(u32 v[4]) 313 { 314 uv4 result; 315 result.E[0] = v[0]; 316 result.E[1] = v[1]; 317 result.E[2] = v[2]; 318 result.E[3] = v[3]; 319 return result; 320 } 321 322 function b32 323 uv4_equal(uv4 a, uv4 b) 324 { 325 return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; 326 } 327 328 function v4 329 v4_from_f32_array(f32 v[4]) 330 { 331 v4 result; 332 result.E[0] = v[0]; 333 result.E[1] = v[1]; 334 result.E[2] = v[2]; 335 result.E[3] = v[3]; 336 return result; 337 } 338 339 function v4 340 v4_scale(v4 a, f32 scale) 341 { 342 v4 result; 343 result.x = scale * a.x; 344 result.y = scale * a.y; 345 result.z = scale * a.z; 346 result.w = scale * a.w; 347 return result; 348 } 349 350 function v4 351 v4_add(v4 a, v4 b) 352 { 353 v4 result; 354 result.x = a.x + b.x; 355 result.y = a.y + b.y; 356 result.z = a.z + b.z; 357 result.w = a.w + b.w; 358 return result; 359 } 360 361 function v4 362 v4_sub(v4 a, v4 b) 363 { 364 v4 result = v4_add(a, v4_scale(b, -1)); 365 return result; 366 } 367 368 function f32 369 v4_dot(v4 a, v4 b) 370 { 371 f32 result = a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; 372 return result; 373 } 374 375 function v4 376 v4_lerp(v4 a, v4 b, f32 t) 377 { 378 v4 result = v4_add(a, v4_scale(v4_sub(b, a), t)); 379 return result; 380 } 381 382 function m4 383 m4_identity(void) 384 { 385 m4 result; 386 result.c[0] = (v4){{1, 0, 0, 0}}; 387 result.c[1] = (v4){{0, 1, 0, 0}}; 388 result.c[2] = (v4){{0, 0, 1, 0}}; 389 result.c[3] = (v4){{0, 0, 0, 1}}; 390 return result; 391 } 392 393 function v4 394 m4_row(m4 a, u32 row) 395 { 396 v4 result; 397 result.E[0] = a.c[0].E[row]; 398 result.E[1] = a.c[1].E[row]; 399 result.E[2] = a.c[2].E[row]; 400 result.E[3] = a.c[3].E[row]; 401 return result; 402 } 403 404 function m4 405 m4_mul(m4 a, m4 b) 406 { 407 m4 result; 408 for (u32 i = 0; i < 4; i++) { 409 for (u32 j = 0; j < 4; j++) { 410 result.c[i].E[j] = v4_dot(m4_row(a, j), b.c[i]); 411 } 412 } 413 return result; 414 } 415 416 /* NOTE(rnp): based on: 417 * https://web.archive.org/web/20131215123403/ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf 418 * TODO(rnp): redo with SIMD as given in the link (but need to rewrite for column-major) 419 */ 420 function m4 421 m4_inverse(m4 m) 422 { 423 m4 result; 424 result.E[ 0] = m.E[5] * m.E[10] * m.E[15] - m.E[5] * m.E[11] * m.E[14] - m.E[9] * m.E[6] * m.E[15] + m.E[9] * m.E[7] * m.E[14] + m.E[13] * m.E[6] * m.E[11] - m.E[13] * m.E[7] * m.E[10]; 425 result.E[ 4] = -m.E[4] * m.E[10] * m.E[15] + m.E[4] * m.E[11] * m.E[14] + m.E[8] * m.E[6] * m.E[15] - m.E[8] * m.E[7] * m.E[14] - m.E[12] * m.E[6] * m.E[11] + m.E[12] * m.E[7] * m.E[10]; 426 result.E[ 8] = m.E[4] * m.E[ 9] * m.E[15] - m.E[4] * m.E[11] * m.E[13] - m.E[8] * m.E[5] * m.E[15] + m.E[8] * m.E[7] * m.E[13] + m.E[12] * m.E[5] * m.E[11] - m.E[12] * m.E[7] * m.E[ 9]; 427 result.E[12] = -m.E[4] * m.E[ 9] * m.E[14] + m.E[4] * m.E[10] * m.E[13] + m.E[8] * m.E[5] * m.E[14] - m.E[8] * m.E[6] * m.E[13] - m.E[12] * m.E[5] * m.E[10] + m.E[12] * m.E[6] * m.E[ 9]; 428 result.E[ 1] = -m.E[1] * m.E[10] * m.E[15] + m.E[1] * m.E[11] * m.E[14] + m.E[9] * m.E[2] * m.E[15] - m.E[9] * m.E[3] * m.E[14] - m.E[13] * m.E[2] * m.E[11] + m.E[13] * m.E[3] * m.E[10]; 429 result.E[ 5] = m.E[0] * m.E[10] * m.E[15] - m.E[0] * m.E[11] * m.E[14] - m.E[8] * m.E[2] * m.E[15] + m.E[8] * m.E[3] * m.E[14] + m.E[12] * m.E[2] * m.E[11] - m.E[12] * m.E[3] * m.E[10]; 430 result.E[ 9] = -m.E[0] * m.E[ 9] * m.E[15] + m.E[0] * m.E[11] * m.E[13] + m.E[8] * m.E[1] * m.E[15] - m.E[8] * m.E[3] * m.E[13] - m.E[12] * m.E[1] * m.E[11] + m.E[12] * m.E[3] * m.E[ 9]; 431 result.E[13] = m.E[0] * m.E[ 9] * m.E[14] - m.E[0] * m.E[10] * m.E[13] - m.E[8] * m.E[1] * m.E[14] + m.E[8] * m.E[2] * m.E[13] + m.E[12] * m.E[1] * m.E[10] - m.E[12] * m.E[2] * m.E[ 9]; 432 result.E[ 2] = m.E[1] * m.E[ 6] * m.E[15] - m.E[1] * m.E[ 7] * m.E[14] - m.E[5] * m.E[2] * m.E[15] + m.E[5] * m.E[3] * m.E[14] + m.E[13] * m.E[2] * m.E[ 7] - m.E[13] * m.E[3] * m.E[ 6]; 433 result.E[ 6] = -m.E[0] * m.E[ 6] * m.E[15] + m.E[0] * m.E[ 7] * m.E[14] + m.E[4] * m.E[2] * m.E[15] - m.E[4] * m.E[3] * m.E[14] - m.E[12] * m.E[2] * m.E[ 7] + m.E[12] * m.E[3] * m.E[ 6]; 434 result.E[10] = m.E[0] * m.E[ 5] * m.E[15] - m.E[0] * m.E[ 7] * m.E[13] - m.E[4] * m.E[1] * m.E[15] + m.E[4] * m.E[3] * m.E[13] + m.E[12] * m.E[1] * m.E[ 7] - m.E[12] * m.E[3] * m.E[ 5]; 435 result.E[14] = -m.E[0] * m.E[ 5] * m.E[14] + m.E[0] * m.E[ 6] * m.E[13] + m.E[4] * m.E[1] * m.E[14] - m.E[4] * m.E[2] * m.E[13] - m.E[12] * m.E[1] * m.E[ 6] + m.E[12] * m.E[2] * m.E[ 5]; 436 result.E[ 3] = -m.E[1] * m.E[ 6] * m.E[11] + m.E[1] * m.E[ 7] * m.E[10] + m.E[5] * m.E[2] * m.E[11] - m.E[5] * m.E[3] * m.E[10] - m.E[ 9] * m.E[2] * m.E[ 7] + m.E[ 9] * m.E[3] * m.E[ 6]; 437 result.E[ 7] = m.E[0] * m.E[ 6] * m.E[11] - m.E[0] * m.E[ 7] * m.E[10] - m.E[4] * m.E[2] * m.E[11] + m.E[4] * m.E[3] * m.E[10] + m.E[ 8] * m.E[2] * m.E[ 7] - m.E[ 8] * m.E[3] * m.E[ 6]; 438 result.E[11] = -m.E[0] * m.E[ 5] * m.E[11] + m.E[0] * m.E[ 7] * m.E[ 9] + m.E[4] * m.E[1] * m.E[11] - m.E[4] * m.E[3] * m.E[ 9] - m.E[ 8] * m.E[1] * m.E[ 7] + m.E[ 8] * m.E[3] * m.E[ 5]; 439 result.E[15] = m.E[0] * m.E[ 5] * m.E[10] - m.E[0] * m.E[ 6] * m.E[ 9] - m.E[4] * m.E[1] * m.E[10] + m.E[4] * m.E[2] * m.E[ 9] + m.E[ 8] * m.E[1] * m.E[ 6] - m.E[ 8] * m.E[2] * m.E[ 5]; 440 441 f32 determinant = m.E[0] * result.E[0] + m.E[1] * result.E[4] + m.E[2] * result.E[8] + m.E[3] * result.E[12]; 442 determinant = 1.0f / determinant; 443 for(i32 i = 0; i < 16; i++) 444 result.E[i] *= determinant; 445 return result; 446 } 447 448 function m4 449 m4_translation(v3 delta) 450 { 451 m4 result; 452 result.c[0] = (v4){{1, 0, 0, 0}}; 453 result.c[1] = (v4){{0, 1, 0, 0}}; 454 result.c[2] = (v4){{0, 0, 1, 0}}; 455 result.c[3] = (v4){{delta.x, delta.y, delta.z, 1}}; 456 return result; 457 } 458 459 function m4 460 m4_scale(v3 scale) 461 { 462 m4 result; 463 result.c[0] = (v4){{scale.x, 0, 0, 0}}; 464 result.c[1] = (v4){{0, scale.y, 0, 0}}; 465 result.c[2] = (v4){{0, 0, scale.z, 0}}; 466 result.c[3] = (v4){{0, 0, 0, 1}}; 467 return result; 468 } 469 470 function m4 471 m4_rotation_about_z(f32 turns) 472 { 473 f32 sa = sin_f32(turns * 2 * PI); 474 f32 ca = cos_f32(turns * 2 * PI); 475 m4 result; 476 result.c[0] = (v4){{ca, -sa, 0, 0}}; 477 result.c[1] = (v4){{sa, ca, 0, 0}}; 478 result.c[2] = (v4){{0, 0, 1, 0}}; 479 result.c[3] = (v4){{0, 0, 0, 1}}; 480 return result; 481 } 482 483 function m4 484 m4_rotation_about_y(f32 turns) 485 { 486 f32 sa = sin_f32(turns * 2 * PI); 487 f32 ca = cos_f32(turns * 2 * PI); 488 m4 result; 489 result.c[0] = (v4){{ca, 0, -sa, 0}}; 490 result.c[1] = (v4){{0, 1, 0, 0}}; 491 result.c[2] = (v4){{sa, 0, ca, 0}}; 492 result.c[3] = (v4){{0, 0, 0, 1}}; 493 return result; 494 } 495 496 function m4 497 y_aligned_volume_transform(v3 extent, v3 translation, f32 rotation_turns) 498 { 499 m4 T = m4_translation(translation); 500 m4 R = m4_rotation_about_y(rotation_turns); 501 m4 S = m4_scale(extent); 502 m4 result = m4_mul(T, m4_mul(R, S)); 503 return result; 504 } 505 506 function v4 507 m4_mul_v4(m4 a, v4 v) 508 { 509 v4 result; 510 result.x = v4_dot(m4_row(a, 0), v); 511 result.y = v4_dot(m4_row(a, 1), v); 512 result.z = v4_dot(m4_row(a, 2), v); 513 result.w = v4_dot(m4_row(a, 3), v); 514 return result; 515 } 516 517 function m4 518 orthographic_projection(f32 n, f32 f, f32 t, f32 r) 519 { 520 m4 result; 521 f32 a = -2 / (f - n); 522 f32 b = - (f + n) / (f - n); 523 result.c[0] = (v4){{1 / r, 0, 0, 0}}; 524 result.c[1] = (v4){{0, 1 / t, 0, 0}}; 525 result.c[2] = (v4){{0, 0, a, 0}}; 526 result.c[3] = (v4){{0, 0, b, 1}}; 527 return result; 528 } 529 530 function m4 531 perspective_projection(f32 n, f32 f, f32 fov, f32 aspect) 532 { 533 m4 result; 534 f32 t = tan_f32(fov / 2.0f); 535 f32 r = t * aspect; 536 f32 a = -(f + n) / (f - n); 537 f32 b = -2 * f * n / (f - n); 538 result.c[0] = (v4){{1 / r, 0, 0, 0}}; 539 result.c[1] = (v4){{0, 1 / t, 0, 0}}; 540 result.c[2] = (v4){{0, 0, a, -1}}; 541 result.c[3] = (v4){{0, 0, b, 0}}; 542 return result; 543 } 544 545 function m4 546 camera_look_at(v3 camera, v3 point) 547 { 548 v3 orthogonal = {{0, 1.0f, 0}}; 549 v3 normal = v3_normalize(v3_sub(camera, point)); 550 v3 right = cross(orthogonal, normal); 551 v3 up = cross(normal, right); 552 553 v3 translate; 554 camera = v3_sub((v3){0}, camera); 555 translate.x = v3_dot(camera, right); 556 translate.y = v3_dot(camera, up); 557 translate.z = v3_dot(camera, normal); 558 559 m4 result; 560 result.c[0] = (v4){{right.x, up.x, normal.x, 0}}; 561 result.c[1] = (v4){{right.y, up.y, normal.y, 0}}; 562 result.c[2] = (v4){{right.z, up.z, normal.z, 0}}; 563 result.c[3] = (v4){{translate.x, translate.y, translate.z, 1}}; 564 return result; 565 } 566 567 /* NOTE(rnp): adapted from "Essential Mathematics for Games and Interactive Applications" (Verth, Bishop) */ 568 function f32 569 obb_raycast(m4 obb_orientation, v3 obb_size, v3 obb_center, ray r) 570 { 571 v3 p = v3_sub(obb_center, r.origin); 572 v3 X = obb_orientation.c[0].xyz; 573 v3 Y = obb_orientation.c[1].xyz; 574 v3 Z = obb_orientation.c[2].xyz; 575 576 /* NOTE(rnp): projects direction vector onto OBB axis */ 577 v3 f; 578 f.x = v3_dot(X, r.direction); 579 f.y = v3_dot(Y, r.direction); 580 f.z = v3_dot(Z, r.direction); 581 582 /* NOTE(rnp): projects relative vector onto OBB axis */ 583 v3 e; 584 e.x = v3_dot(X, p); 585 e.y = v3_dot(Y, p); 586 e.z = v3_dot(Z, p); 587 588 f32 result = 0; 589 f32 t[6] = {0}; 590 for (i32 i = 0; i < 3; i++) { 591 if (f32_cmp(f.E[i], 0)) { 592 if (-e.E[i] - obb_size.E[i] > 0 || -e.E[i] + obb_size.E[i] < 0) 593 result = -1.0f; 594 f.E[i] = F32_EPSILON; 595 } 596 t[i * 2 + 0] = (e.E[i] + obb_size.E[i]) / f.E[i]; 597 t[i * 2 + 1] = (e.E[i] - obb_size.E[i]) / f.E[i]; 598 } 599 600 if (result != -1) { 601 f32 tmin = MAX(MAX(MIN(t[0], t[1]), MIN(t[2], t[3])), MIN(t[4], t[5])); 602 f32 tmax = MIN(MIN(MAX(t[0], t[1]), MAX(t[2], t[3])), MAX(t[4], t[5])); 603 if (tmax >= 0 && tmin <= tmax) { 604 result = tmin > 0 ? tmin : tmax; 605 } else { 606 result = -1; 607 } 608 } 609 610 return result; 611 } 612 613 function v4 614 hsv_to_rgb(v4 hsv) 615 { 616 /* f(k(n)) = V - V*S*max(0, min(k, min(4 - k, 1))) 617 * k(n) = fmod((n + H * 6), 6) 618 * (R, G, B) = (f(n = 5), f(n = 3), f(n = 1)) 619 */ 620 align_as(16) f32 nval[4] = {5.0f, 3.0f, 1.0f, 0.0f}; 621 f32x4 n = load_f32x4(nval); 622 f32x4 H = dup_f32x4(hsv.x); 623 f32x4 S = dup_f32x4(hsv.y); 624 f32x4 V = dup_f32x4(hsv.z); 625 f32x4 six = dup_f32x4(6); 626 627 f32x4 t = add_f32x4(n, mul_f32x4(six, H)); 628 f32x4 rem = floor_f32x4(div_f32x4(t, six)); 629 f32x4 k = sub_f32x4(t, mul_f32x4(rem, six)); 630 631 t = min_f32x4(sub_f32x4(dup_f32x4(4), k), dup_f32x4(1)); 632 t = max_f32x4(dup_f32x4(0), min_f32x4(k, t)); 633 t = mul_f32x4(t, mul_f32x4(S, V)); 634 635 v4 rgba; 636 store_f32x4(rgba.E, sub_f32x4(V, t)); 637 rgba.a = hsv.a; 638 return rgba; 639 } 640 641 function f32 642 ease_cubic(f32 t) 643 { 644 f32 result; 645 if (t < 0.5f) { 646 result = 4.0f * t * t * t; 647 } else { 648 f32 c = -2.0f * t + 2.0f; 649 result = 1.0f - c * c * c / 2.0f; 650 } 651 return result; 652 }