ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

math.c (24344B)


      1 #include "external/cephes.c"
      2 
      3 function void
      4 fill_kronecker_sub_matrix(f32 *out, i32 out_stride, f32 scale, f32 *b, iv2 b_dim)
      5 {
      6 	f32x4 vscale = dup_f32x4((f32)scale);
      7 	for (i32 i = 0; i < b_dim.y; i++) {
      8 		for (i32 j = 0; j < b_dim.x; j += 4, b += 4) {
      9 			f32x4 vb = load_f32x4(b);
     10 			store_f32x4(out + j, mul_f32x4(vscale, vb));
     11 		}
     12 		out += out_stride;
     13 	}
     14 }
     15 
     16 /* NOTE: this won't check for valid space/etc and assumes row major order */
     17 function void
     18 kronecker_product(f32 *out, f32 *a, iv2 a_dim, f32 *b, iv2 b_dim)
     19 {
     20 	iv2 out_dim = {{a_dim.x * b_dim.x, a_dim.y * b_dim.y}};
     21 	assert(out_dim.y % 4 == 0);
     22 	for (i32 i = 0; i < a_dim.y; i++) {
     23 		f32 *vout = out;
     24 		for (i32 j = 0; j < a_dim.x; j++, a++) {
     25 			fill_kronecker_sub_matrix(vout, out_dim.y, *a, b, b_dim);
     26 			vout += b_dim.y;
     27 		}
     28 		out += out_dim.y * b_dim.x;
     29 	}
     30 }
     31 
     32 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */
     33 function f32 *
     34 make_hadamard_transpose(Arena *a, i32 dim)
     35 {
     36 	read_only local_persist	f32 hadamard_12_12_transpose[] = {
     37 		1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
     38 		1, -1, -1,  1, -1, -1, -1,  1,  1,  1, -1,  1,
     39 		1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1, -1,
     40 		1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1,
     41 		1,  1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,
     42 		1,  1,  1, -1,  1, -1, -1,  1, -1, -1, -1,  1,
     43 		1,  1,  1,  1, -1,  1, -1, -1,  1, -1, -1, -1,
     44 		1, -1,  1,  1,  1, -1,  1, -1, -1,  1, -1, -1,
     45 		1, -1, -1,  1,  1,  1, -1,  1, -1, -1,  1, -1,
     46 		1, -1, -1, -1,  1,  1,  1, -1,  1, -1, -1,  1,
     47 		1,  1, -1, -1, -1,  1,  1,  1, -1,  1, -1, -1,
     48 		1, -1,  1, -1, -1, -1,  1,  1,  1, -1,  1, -1,
     49 	};
     50 
     51 	read_only local_persist f32 hadamard_20_20_transpose[] = {
     52 		1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
     53 		1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1,
     54 		1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1,
     55 		1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,
     56 		1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,
     57 		1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1,
     58 		1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1,
     59 		1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1,
     60 		1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1,
     61 		1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,
     62 		1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1,
     63 		1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,
     64 		1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1,
     65 		1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,
     66 		1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,
     67 		1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,
     68 		1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,
     69 		1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1,
     70 		1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1,
     71 		1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,
     72 	};
     73 
     74 	f32 *result = 0;
     75 
     76 	b32 power_of_2     = ISPOWEROF2(dim);
     77 	b32 multiple_of_12 = dim % 12 == 0;
     78 	b32 multiple_of_20 = dim % 20 == 0;
     79 	iz elements        = dim * dim;
     80 
     81 	i32 base_dim = 0;
     82 	if (power_of_2) {
     83 		base_dim  = dim;
     84 	} else if (multiple_of_20 && ISPOWEROF2(dim / 20)) {
     85 		base_dim  = 20;
     86 		dim      /= 20;
     87 	} else if (multiple_of_12 && ISPOWEROF2(dim / 12)) {
     88 		base_dim  = 12;
     89 		dim      /= 12;
     90 	}
     91 
     92 	if (ISPOWEROF2(dim) && base_dim && arena_capacity(a, f32) >= elements * (1 + (dim != base_dim))) {
     93 		result = push_array(a, f32, elements);
     94 
     95 		Arena tmp = *a;
     96 		f32 *m = dim == base_dim ? result : push_array(&tmp, f32, elements);
     97 
     98 		#define IND(i, j) ((i) * dim + (j))
     99 		m[0] = 1;
    100 		for (i32 k = 1; k < dim; k *= 2) {
    101 			for (i32 i = 0; i < k; i++) {
    102 				for (i32 j = 0; j < k; j++) {
    103 					f32 val = m[IND(i, j)];
    104 					m[IND(i + k, j)]     =  val;
    105 					m[IND(i, j + k)]     =  val;
    106 					m[IND(i + k, j + k)] = -val;
    107 				}
    108 			}
    109 		}
    110 		#undef IND
    111 
    112 		f32 *m2 = 0;
    113 		iv2 m2_dim;
    114 		switch (base_dim) {
    115 		case 12:{ m2 = hadamard_12_12_transpose; m2_dim = (iv2){{12, 12}}; }break;
    116 		case 20:{ m2 = hadamard_20_20_transpose; m2_dim = (iv2){{20, 20}}; }break;
    117 		}
    118 		if (m2) kronecker_product(result, m, (iv2){{dim, dim}}, m2, m2_dim);
    119 	}
    120 
    121 	return result;
    122 }
    123 
    124 function b32
    125 u128_equal(u128 a, u128 b)
    126 {
    127 	b32 result = a.U64[0] == b.U64[0] && a.U64[1] == b.U64[1];
    128 	return result;
    129 }
    130 
    131 function RangeU64
    132 subrange_n_from_n_m_count(u64 n, u64 n_count, u64 m)
    133 {
    134 	assert(n < n_count);
    135 
    136 	u64 per_lane            = m / n_count;
    137 	u64 leftover            = m - per_lane * n_count;
    138 	u64 leftovers_before_n  = MIN(leftover, n);
    139 	u64 base_index          = n * per_lane + leftovers_before_n;
    140 	u64 one_past_last_index = base_index + per_lane + ((n < leftover) ? 1 : 0);
    141 
    142 	RangeU64 result = {base_index, one_past_last_index};
    143 	return result;
    144 }
    145 
    146 function b32
    147 iv2_equal(iv2 a, iv2 b)
    148 {
    149 	b32 result = a.x == b.x && a.y == b.y;
    150 	return result;
    151 }
    152 
    153 function b32
    154 iv3_equal(iv3 a, iv3 b)
    155 {
    156 	b32 result = a.x == b.x && a.y == b.y && a.z == b.z;
    157 	return result;
    158 }
    159 
    160 function i32
    161 iv3_dimension(iv3 points)
    162 {
    163 	i32 result = (points.x > 1) + (points.y > 1) + (points.z > 1);
    164 	return result;
    165 }
    166 
    167 function v2
    168 clamp_v2_rect(v2 v, Rect r)
    169 {
    170 	v2 result = v;
    171 	result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x);
    172 	result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y);
    173 	return result;
    174 }
    175 
    176 function v2
    177 v2_from_iv2(iv2 v)
    178 {
    179 	v2 result;
    180 	result.E[0] = (f32)v.E[0];
    181 	result.E[1] = (f32)v.E[1];
    182 	return result;
    183 }
    184 
    185 function v2
    186 v2_abs(v2 a)
    187 {
    188 	v2 result;
    189 	result.x = Abs(a.x);
    190 	result.y = Abs(a.y);
    191 	return result;
    192 }
    193 
    194 function v2
    195 v2_scale(v2 a, f32 scale)
    196 {
    197 	v2 result;
    198 	result.x = a.x * scale;
    199 	result.y = a.y * scale;
    200 	return result;
    201 }
    202 
    203 function v2
    204 v2_add(v2 a, v2 b)
    205 {
    206 	v2 result;
    207 	result.x = a.x + b.x;
    208 	result.y = a.y + b.y;
    209 	return result;
    210 }
    211 
    212 function v2
    213 v2_sub(v2 a, v2 b)
    214 {
    215 	v2 result = v2_add(a, v2_scale(b, -1.0f));
    216 	return result;
    217 }
    218 
    219 function v2
    220 v2_mul(v2 a, v2 b)
    221 {
    222 	v2 result;
    223 	result.x = a.x * b.x;
    224 	result.y = a.y * b.y;
    225 	return result;
    226 }
    227 
    228 function v2
    229 v2_div(v2 a, v2 b)
    230 {
    231 	v2 result;
    232 	result.x = a.x / b.x;
    233 	result.y = a.y / b.y;
    234 	return result;
    235 }
    236 
    237 function v2
    238 v2_floor(v2 a)
    239 {
    240 	v2 result;
    241 	result.x = (f32)((i32)a.x);
    242 	result.y = (f32)((i32)a.y);
    243 	return result;
    244 }
    245 
    246 function f32
    247 v2_magnitude_squared(v2 a)
    248 {
    249 	f32 result = a.x * a.x + a.y * a.y;
    250 	return result;
    251 }
    252 
    253 function f32
    254 v2_magnitude(v2 a)
    255 {
    256 	f32 result = sqrt_f32(a.x * a.x + a.y * a.y);
    257 	return result;
    258 }
    259 
    260 function v3
    261 cross(v3 a, v3 b)
    262 {
    263 	v3 result;
    264 	result.x = a.y * b.z - a.z * b.y;
    265 	result.y = a.z * b.x - a.x * b.z;
    266 	result.z = a.x * b.y - a.y * b.x;
    267 	return result;
    268 }
    269 
    270 function v3
    271 v3_from_iv3(iv3 v)
    272 {
    273 	v3 result;
    274 	result.E[0] = (f32)v.E[0];
    275 	result.E[1] = (f32)v.E[1];
    276 	result.E[2] = (f32)v.E[2];
    277 	return result;
    278 }
    279 
    280 function v3
    281 v3_abs(v3 a)
    282 {
    283 	v3 result;
    284 	result.x = Abs(a.x);
    285 	result.y = Abs(a.y);
    286 	result.z = Abs(a.z);
    287 	return result;
    288 }
    289 
    290 function v3
    291 v3_scale(v3 a, f32 scale)
    292 {
    293 	v3 result;
    294 	result.x = scale * a.x;
    295 	result.y = scale * a.y;
    296 	result.z = scale * a.z;
    297 	return result;
    298 }
    299 
    300 function v3
    301 v3_add(v3 a, v3 b)
    302 {
    303 	v3 result;
    304 	result.x = a.x + b.x;
    305 	result.y = a.y + b.y;
    306 	result.z = a.z + b.z;
    307 	return result;
    308 }
    309 
    310 function v3
    311 v3_sub(v3 a, v3 b)
    312 {
    313 	v3 result = v3_add(a, v3_scale(b, -1.0f));
    314 	return result;
    315 }
    316 
    317 function v3
    318 v3_div(v3 a, v3 b)
    319 {
    320 	v3 result;
    321 	result.x = a.x / b.x;
    322 	result.y = a.y / b.y;
    323 	result.z = a.z / b.z;
    324 	return result;
    325 }
    326 
    327 function f32
    328 v3_dot(v3 a, v3 b)
    329 {
    330 	f32 result = a.x * b.x + a.y * b.y + a.z * b.z;
    331 	return result;
    332 }
    333 
    334 function f32
    335 v3_magnitude_squared(v3 a)
    336 {
    337 	f32 result = v3_dot(a, a);
    338 	return result;
    339 }
    340 
    341 function f32
    342 v3_magnitude(v3 a)
    343 {
    344 	f32 result = sqrt_f32(v3_dot(a, a));
    345 	return result;
    346 }
    347 
    348 function v3
    349 v3_normalize(v3 a)
    350 {
    351 	v3 result = v3_scale(a, 1.0f / v3_magnitude(a));
    352 	return result;
    353 }
    354 
    355 function v4
    356 v4_scale(v4 a, f32 scale)
    357 {
    358 	v4 result;
    359 	result.x = scale * a.x;
    360 	result.y = scale * a.y;
    361 	result.z = scale * a.z;
    362 	result.w = scale * a.w;
    363 	return result;
    364 }
    365 
    366 function v4
    367 v4_add(v4 a, v4 b)
    368 {
    369 	v4 result;
    370 	result.x = a.x + b.x;
    371 	result.y = a.y + b.y;
    372 	result.z = a.z + b.z;
    373 	result.w = a.w + b.w;
    374 	return result;
    375 }
    376 
    377 function v4
    378 v4_sub(v4 a, v4 b)
    379 {
    380 	v4 result = v4_add(a, v4_scale(b, -1));
    381 	return result;
    382 }
    383 
    384 function f32
    385 v4_dot(v4 a, v4 b)
    386 {
    387 	f32 result = a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
    388 	return result;
    389 }
    390 
    391 function v4
    392 v4_lerp(v4 a, v4 b, f32 t)
    393 {
    394 	v4 result = v4_add(a, v4_scale(v4_sub(b, a), t));
    395 	return result;
    396 }
    397 
    398 function m4
    399 m4_identity(void)
    400 {
    401 	m4 result;
    402 	result.c[0] = (v4){{1, 0, 0, 0}};
    403 	result.c[1] = (v4){{0, 1, 0, 0}};
    404 	result.c[2] = (v4){{0, 0, 1, 0}};
    405 	result.c[3] = (v4){{0, 0, 0, 1}};
    406 	return result;
    407 }
    408 
    409 function v4
    410 m4_row(m4 a, u32 row)
    411 {
    412 	v4 result;
    413 	result.E[0] = a.c[0].E[row];
    414 	result.E[1] = a.c[1].E[row];
    415 	result.E[2] = a.c[2].E[row];
    416 	result.E[3] = a.c[3].E[row];
    417 	return result;
    418 }
    419 
    420 function m4
    421 m4_mul(m4 a, m4 b)
    422 {
    423 	m4 result;
    424 	for (u32 i = 0; i < 4; i++) {
    425 		for (u32 j = 0; j < 4; j++) {
    426 			result.c[i].E[j] = v4_dot(m4_row(a, j), b.c[i]);
    427 		}
    428 	}
    429 	return result;
    430 }
    431 
    432 /* NOTE(rnp): based on:
    433  * https://web.archive.org/web/20131215123403/ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf
    434  * TODO(rnp): redo with SIMD as given in the link (but need to rewrite for column-major)
    435  */
    436 function m4
    437 m4_inverse(m4 m)
    438 {
    439 	m4 result;
    440 	result.E[ 0] =  m.E[5] * m.E[10] * m.E[15] - m.E[5] * m.E[11] * m.E[14] - m.E[9] * m.E[6] * m.E[15] + m.E[9] * m.E[7] * m.E[14] + m.E[13] * m.E[6] * m.E[11] - m.E[13] * m.E[7] * m.E[10];
    441 	result.E[ 4] = -m.E[4] * m.E[10] * m.E[15] + m.E[4] * m.E[11] * m.E[14] + m.E[8] * m.E[6] * m.E[15] - m.E[8] * m.E[7] * m.E[14] - m.E[12] * m.E[6] * m.E[11] + m.E[12] * m.E[7] * m.E[10];
    442 	result.E[ 8] =  m.E[4] * m.E[ 9] * m.E[15] - m.E[4] * m.E[11] * m.E[13] - m.E[8] * m.E[5] * m.E[15] + m.E[8] * m.E[7] * m.E[13] + m.E[12] * m.E[5] * m.E[11] - m.E[12] * m.E[7] * m.E[ 9];
    443 	result.E[12] = -m.E[4] * m.E[ 9] * m.E[14] + m.E[4] * m.E[10] * m.E[13] + m.E[8] * m.E[5] * m.E[14] - m.E[8] * m.E[6] * m.E[13] - m.E[12] * m.E[5] * m.E[10] + m.E[12] * m.E[6] * m.E[ 9];
    444 	result.E[ 1] = -m.E[1] * m.E[10] * m.E[15] + m.E[1] * m.E[11] * m.E[14] + m.E[9] * m.E[2] * m.E[15] - m.E[9] * m.E[3] * m.E[14] - m.E[13] * m.E[2] * m.E[11] + m.E[13] * m.E[3] * m.E[10];
    445 	result.E[ 5] =  m.E[0] * m.E[10] * m.E[15] - m.E[0] * m.E[11] * m.E[14] - m.E[8] * m.E[2] * m.E[15] + m.E[8] * m.E[3] * m.E[14] + m.E[12] * m.E[2] * m.E[11] - m.E[12] * m.E[3] * m.E[10];
    446 	result.E[ 9] = -m.E[0] * m.E[ 9] * m.E[15] + m.E[0] * m.E[11] * m.E[13] + m.E[8] * m.E[1] * m.E[15] - m.E[8] * m.E[3] * m.E[13] - m.E[12] * m.E[1] * m.E[11] + m.E[12] * m.E[3] * m.E[ 9];
    447 	result.E[13] =  m.E[0] * m.E[ 9] * m.E[14] - m.E[0] * m.E[10] * m.E[13] - m.E[8] * m.E[1] * m.E[14] + m.E[8] * m.E[2] * m.E[13] + m.E[12] * m.E[1] * m.E[10] - m.E[12] * m.E[2] * m.E[ 9];
    448 	result.E[ 2] =  m.E[1] * m.E[ 6] * m.E[15] - m.E[1] * m.E[ 7] * m.E[14] - m.E[5] * m.E[2] * m.E[15] + m.E[5] * m.E[3] * m.E[14] + m.E[13] * m.E[2] * m.E[ 7] - m.E[13] * m.E[3] * m.E[ 6];
    449 	result.E[ 6] = -m.E[0] * m.E[ 6] * m.E[15] + m.E[0] * m.E[ 7] * m.E[14] + m.E[4] * m.E[2] * m.E[15] - m.E[4] * m.E[3] * m.E[14] - m.E[12] * m.E[2] * m.E[ 7] + m.E[12] * m.E[3] * m.E[ 6];
    450 	result.E[10] =  m.E[0] * m.E[ 5] * m.E[15] - m.E[0] * m.E[ 7] * m.E[13] - m.E[4] * m.E[1] * m.E[15] + m.E[4] * m.E[3] * m.E[13] + m.E[12] * m.E[1] * m.E[ 7] - m.E[12] * m.E[3] * m.E[ 5];
    451 	result.E[14] = -m.E[0] * m.E[ 5] * m.E[14] + m.E[0] * m.E[ 6] * m.E[13] + m.E[4] * m.E[1] * m.E[14] - m.E[4] * m.E[2] * m.E[13] - m.E[12] * m.E[1] * m.E[ 6] + m.E[12] * m.E[2] * m.E[ 5];
    452 	result.E[ 3] = -m.E[1] * m.E[ 6] * m.E[11] + m.E[1] * m.E[ 7] * m.E[10] + m.E[5] * m.E[2] * m.E[11] - m.E[5] * m.E[3] * m.E[10] - m.E[ 9] * m.E[2] * m.E[ 7] + m.E[ 9] * m.E[3] * m.E[ 6];
    453 	result.E[ 7] =  m.E[0] * m.E[ 6] * m.E[11] - m.E[0] * m.E[ 7] * m.E[10] - m.E[4] * m.E[2] * m.E[11] + m.E[4] * m.E[3] * m.E[10] + m.E[ 8] * m.E[2] * m.E[ 7] - m.E[ 8] * m.E[3] * m.E[ 6];
    454 	result.E[11] = -m.E[0] * m.E[ 5] * m.E[11] + m.E[0] * m.E[ 7] * m.E[ 9] + m.E[4] * m.E[1] * m.E[11] - m.E[4] * m.E[3] * m.E[ 9] - m.E[ 8] * m.E[1] * m.E[ 7] + m.E[ 8] * m.E[3] * m.E[ 5];
    455 	result.E[15] =  m.E[0] * m.E[ 5] * m.E[10] - m.E[0] * m.E[ 6] * m.E[ 9] - m.E[4] * m.E[1] * m.E[10] + m.E[4] * m.E[2] * m.E[ 9] + m.E[ 8] * m.E[1] * m.E[ 6] - m.E[ 8] * m.E[2] * m.E[ 5];
    456 
    457 	f32 determinant = m.E[0] * result.E[0] + m.E[1] * result.E[4] + m.E[2] * result.E[8] + m.E[3] * result.E[12];
    458 	determinant = 1.0f / determinant;
    459 	for(i32 i = 0; i < 16; i++)
    460 		result.E[i] *= determinant;
    461 	return result;
    462 }
    463 
    464 function m4
    465 m4_translation(v3 delta)
    466 {
    467 	m4 result;
    468 	result.c[0] = (v4){{1, 0, 0, 0}};
    469 	result.c[1] = (v4){{0, 1, 0, 0}};
    470 	result.c[2] = (v4){{0, 0, 1, 0}};
    471 	result.c[3] = (v4){{delta.x, delta.y, delta.z, 1}};
    472 	return result;
    473 }
    474 
    475 function m4
    476 m4_scale(v3 scale)
    477 {
    478 	m4 result;
    479 	result.c[0] = (v4){{scale.x, 0,       0,       0}};
    480 	result.c[1] = (v4){{0,       scale.y, 0,       0}};
    481 	result.c[2] = (v4){{0,       0,       scale.z, 0}};
    482 	result.c[3] = (v4){{0,       0,       0,       1}};
    483 	return result;
    484 }
    485 
    486 function m4
    487 m4_rotation_about_axis(v3 axis, f32 turns)
    488 {
    489 	assert(f32_equal(v3_magnitude_squared(axis), 1.0f));
    490 	f32 sa  = sin_f32(turns * 2 * PI);
    491 	f32 ca  = cos_f32(turns * 2 * PI);
    492 	f32 mca = 1.0f - ca;
    493 
    494 	f32 x = axis.x, x2 = x * x;
    495 	f32 y = axis.y, y2 = y * y;
    496 	f32 z = axis.z, z2 = z * z;
    497 
    498 	m4 result;
    499 	result.c[0] = (v4){{ca + mca * x2,        mca * x * y - sa * z, mca * x * z + sa * y, 0}};
    500 	result.c[1] = (v4){{mca * x * y + sa * z, ca + mca * y2,        mca * y * z - sa * x, 0}};
    501 	result.c[2] = (v4){{mca * x * z - sa * y, mca * y * z + sa * x, ca + mca * z2,        0}};
    502 	result.c[3] = (v4){{0, 0, 0, 1}};
    503 	return result;
    504 }
    505 
    506 function m4
    507 m4_rotation_about_z(f32 turns)
    508 {
    509 	m4 result = m4_rotation_about_axis((v3){{0, 0, 1.0f}}, turns);
    510 	return result;
    511 }
    512 
    513 function m4
    514 m4_rotation_about_y(f32 turns)
    515 {
    516 	m4 result = m4_rotation_about_axis((v3){{0, 1.0f, 0}}, turns);
    517 	return result;
    518 }
    519 
    520 function m4
    521 y_aligned_volume_transform(v3 extent, v3 translation, f32 rotation_turns)
    522 {
    523 	m4 T = m4_translation(translation);
    524 	m4 R = m4_rotation_about_y(rotation_turns);
    525 	m4 S = m4_scale(extent);
    526 	m4 result = m4_mul(T, m4_mul(R, S));
    527 	return result;
    528 }
    529 
    530 function v4
    531 m4_mul_v4(m4 a, v4 v)
    532 {
    533 	v4 result;
    534 	result.x = v4_dot(m4_row(a, 0), v);
    535 	result.y = v4_dot(m4_row(a, 1), v);
    536 	result.z = v4_dot(m4_row(a, 2), v);
    537 	result.w = v4_dot(m4_row(a, 3), v);
    538 	return result;
    539 }
    540 
    541 function m4
    542 orthographic_projection(f32 n, f32 f, f32 t, f32 r)
    543 {
    544 	m4 result;
    545 	f32 a = -2 / (f - n);
    546 	f32 b = - (f + n) / (f - n);
    547 	result.c[0] = (v4){{1 / r, 0,     0,  0}};
    548 	result.c[1] = (v4){{0,     1 / t, 0,  0}};
    549 	result.c[2] = (v4){{0,     0,     a,  0}};
    550 	result.c[3] = (v4){{0,     0,     b,  1}};
    551 	return result;
    552 }
    553 
    554 function m4
    555 perspective_projection(f32 n, f32 f, f32 fov, f32 aspect)
    556 {
    557 	m4 result;
    558 	f32 t = tan_f32(fov / 2.0f);
    559 	f32 r = t * aspect;
    560 	f32 a = -(f + n) / (f - n);
    561 	f32 b = -2 * f * n / (f - n);
    562 	result.c[0] = (v4){{1 / r, 0,     0,  0}};
    563 	result.c[1] = (v4){{0,     1 / t, 0,  0}};
    564 	result.c[2] = (v4){{0,     0,     a, -1}};
    565 	result.c[3] = (v4){{0,     0,     b,  0}};
    566 	return result;
    567 }
    568 
    569 function m4
    570 camera_look_at(v3 camera, v3 point)
    571 {
    572 	v3 orthogonal = {{0, 1.0f, 0}};
    573 	v3 normal     = v3_normalize(v3_sub(camera, point));
    574 	v3 right      = cross(orthogonal, normal);
    575 	v3 up         = cross(normal,     right);
    576 
    577 	v3 translate;
    578 	camera      = v3_sub((v3){0}, camera);
    579 	translate.x = v3_dot(camera, right);
    580 	translate.y = v3_dot(camera, up);
    581 	translate.z = v3_dot(camera, normal);
    582 
    583 	m4 result;
    584 	result.c[0] = (v4){{right.x,     up.x,        normal.x,    0}};
    585 	result.c[1] = (v4){{right.y,     up.y,        normal.y,    0}};
    586 	result.c[2] = (v4){{right.z,     up.z,        normal.z,    0}};
    587 	result.c[3] = (v4){{translate.x, translate.y, translate.z, 1}};
    588 	return result;
    589 }
    590 
    591 /* NOTE(rnp): adapted from "Essential Mathematics for Games and Interactive Applications" (Verth, Bishop) */
    592 function f32
    593 obb_raycast(m4 obb_orientation, v3 obb_size, v3 obb_center, ray r)
    594 {
    595 	v3 p = v3_sub(obb_center, r.origin);
    596 	v3 X = obb_orientation.c[0].xyz;
    597 	v3 Y = obb_orientation.c[1].xyz;
    598 	v3 Z = obb_orientation.c[2].xyz;
    599 
    600 	/* NOTE(rnp): projects direction vector onto OBB axis */
    601 	v3 f;
    602 	f.x = v3_dot(X, r.direction);
    603 	f.y = v3_dot(Y, r.direction);
    604 	f.z = v3_dot(Z, r.direction);
    605 
    606 	/* NOTE(rnp): projects relative vector onto OBB axis */
    607 	v3 e;
    608 	e.x = v3_dot(X, p);
    609 	e.y = v3_dot(Y, p);
    610 	e.z = v3_dot(Z, p);
    611 
    612 	f32 result = 0;
    613 	f32 t[6] = {0};
    614 	for (i32 i = 0; i < 3; i++) {
    615 		if (f32_equal(f.E[i], 0)) {
    616 			if (-e.E[i] - obb_size.E[i] > 0 || -e.E[i] + obb_size.E[i] < 0)
    617 				result = -1.0f;
    618 			f.E[i] = F32_EPSILON;
    619 		}
    620 		t[i * 2 + 0] = (e.E[i] + obb_size.E[i]) / f.E[i];
    621 		t[i * 2 + 1] = (e.E[i] - obb_size.E[i]) / f.E[i];
    622 	}
    623 
    624 	if (result != -1) {
    625 		f32 tmin = MAX(MAX(MIN(t[0], t[1]), MIN(t[2], t[3])), MIN(t[4], t[5]));
    626 		f32 tmax = MIN(MIN(MAX(t[0], t[1]), MAX(t[2], t[3])), MAX(t[4], t[5]));
    627 		if (tmax >= 0 && tmin <= tmax) {
    628 			result = tmin > 0 ? tmin : tmax;
    629 		} else {
    630 			result = -1;
    631 		}
    632 	}
    633 
    634 	return result;
    635 }
    636 
    637 function f32
    638 complex_filter_first_moment(v2 *filter, i32 length, f32 sampling_frequency)
    639 {
    640 	f32 n = 0, d = 0;
    641 	for (i32 i = 0; i < length; i++) {
    642 		f32 t = v2_magnitude_squared(filter[i]);
    643 		n += (f32)i * t;
    644 		d += t;
    645 	}
    646 	f32 result = n / d / sampling_frequency;
    647 	return result;
    648 }
    649 
    650 function f32
    651 real_filter_first_moment(f32 *filter, i32 length, f32 sampling_frequency)
    652 {
    653 	f32 n = 0, d = 0;
    654 	for (i32 i = 0; i < length; i++) {
    655 		f32 t = filter[i] * filter[i];
    656 		n += (f32)i * t;
    657 		d += t;
    658 	}
    659 	f32 result = n / d / sampling_frequency;
    660 	return result;
    661 }
    662 
    663 function f32
    664 tukey_window(f32 t, f32 tapering)
    665 {
    666 	f32 r = tapering;
    667 	f32 result = 1;
    668 	if (t < r / 2)      result = 0.5f * (1 + cos_f32(2 * PI * (t - r / 2)     / r));
    669 	if (t >= 1 - r / 2) result = 0.5f * (1 + cos_f32(2 * PI * (t - 1 + r / 2) / r));
    670 	return result;
    671 }
    672 
    673 /* NOTE(rnp): adapted from "Discrete Time Signal Processing" (Oppenheim) */
    674 function f32 *
    675 kaiser_low_pass_filter(Arena *arena, f32 cutoff_frequency, f32 sampling_frequency, f32 beta, i32 length)
    676 {
    677 	f32 *result = push_array(arena, f32, length);
    678 	f32 wc      = 2 * PI * cutoff_frequency / sampling_frequency;
    679 	f32 a       = (f32)length / 2.0f;
    680 	f32 pi_i0_b = PI * (f32)cephes_i0(beta);
    681 
    682 	for (i32 n = 0; n < length; n++) {
    683 		f32 t       = (f32)n - a;
    684 		f32 impulse = !f32_equal(t, 0) ? sin_f32(wc * t) / t : wc;
    685 		t           = t / a;
    686 		f32 window  = (f32)cephes_i0(beta * sqrt_f32(1 - t * t)) / pi_i0_b;
    687 		result[n]   = impulse * window;
    688 	}
    689 
    690 	return result;
    691 }
    692 
    693 function f32 *
    694 rf_chirp(Arena *arena, f32 min_frequency, f32 max_frequency, f32 sampling_frequency,
    695          i32 length, b32 reverse)
    696 {
    697 	f32 *result = push_array(arena, f32, length);
    698 	for (i32 i = 0; i < length; i++) {
    699 		i32 index = reverse? length - 1 - i : i;
    700 		f32 fc    = min_frequency + (f32)i * (max_frequency - min_frequency) / (2 * (f32)length);
    701 		f32 arg   = 2 * PI * fc * (f32)i / sampling_frequency;
    702 		result[index] = sin_f32(arg) * tukey_window((f32)i / (f32)length, 0.2f);
    703 	}
    704 	return result;
    705 }
    706 
    707 function v2 *
    708 baseband_chirp(Arena *arena, f32 min_frequency, f32 max_frequency, f32 sampling_frequency,
    709                i32 length, b32 reverse, f32 scale)
    710 {
    711 	v2 *result    = push_array(arena, v2, length);
    712 	f32 conjugate = reverse ? -1 : 1;
    713 	for (i32 i = 0; i < length; i++) {
    714 		i32 index = reverse? length - 1 - i : i;
    715 		f32 fc    = min_frequency + (f32)i * (max_frequency - min_frequency) / (2 * (f32)length);
    716 		f32 arg   = 2 * PI * fc * (f32)i / sampling_frequency;
    717 		v2 sample = {{scale * cos_f32(arg), conjugate * scale * sin_f32(arg)}};
    718 		result[index] = v2_scale(sample, tukey_window((f32)i / (f32)length, 0.2f));
    719 	}
    720 	return result;
    721 }
    722 
    723 function iv3
    724 das_output_dimension(iv3 points)
    725 {
    726 	iv3 result;
    727 	result.x = Max(points.x, 1);
    728 	result.y = Max(points.y, 1);
    729 	result.z = Max(points.z, 1);
    730 
    731 	switch (iv3_dimension(result)) {
    732 	case 1:{
    733 		if (result.y > 1) result.x = result.y;
    734 		if (result.z > 1) result.x = result.z;
    735 		result.y = result.z = 1;
    736 	}break;
    737 
    738 	case 2:{
    739 		if (result.x > 1) {
    740 			if (result.z > 1) result.y = result.z;
    741 		} else {
    742 			result.x = result.z;
    743 		}
    744 		result.z = 1;
    745 	}break;
    746 
    747 	case 3:{}break;
    748 
    749 	InvalidDefaultCase;
    750 	}
    751 
    752 	return result;
    753 }
    754 
    755 function m4
    756 das_transform_1d(v3 p1, v3 p2)
    757 {
    758 	v3 extent = v3_sub(p2, p1);
    759 	m4 result = {
    760 		.c[0] = (v4){{extent.x, extent.y, extent.z, 0.0f}},
    761 		.c[1] = (v4){{0.0f, 0.0f, 0.0f, 0.0f}},
    762 		.c[2] = (v4){{0.0f, 0.0f, 0.0f, 0.0f}},
    763 		.c[3] = (v4){{p1.x, p1.y, p1.z, 1.0f}},
    764 	};
    765 	return result;
    766 }
    767 
    768 function m4
    769 das_transform_2d_xz(v2 min_coordinate, v2 max_coordinate, f32 y_off)
    770 {
    771 	v2 extent = v2_sub(max_coordinate, min_coordinate);
    772 
    773 	m4 result;
    774 	result.c[0] = (v4){{extent.x,         0.0f,  0.0f,             0.0f}};
    775 	result.c[1] = (v4){{0.0f,             0.0f,  extent.y,         0.0f}};
    776 	result.c[2] = (v4){{0.0f,             1.0f,  0.0f,             0.0f}};
    777 	result.c[3] = (v4){{min_coordinate.x, y_off, min_coordinate.y, 1.0f}};
    778 	return result;
    779 }
    780 
    781 function m4
    782 das_transform_2d_yz(v2 min_coordinate, v2 max_coordinate, f32 x_off)
    783 {
    784 	v2 extent = v2_sub(max_coordinate, min_coordinate);
    785 
    786 	m4 result;
    787 	result.c[0] = (v4){{0.0f,  extent.x,         0.0f,             0.0f}};
    788 	result.c[1] = (v4){{0.0f,  0.0f,             extent.y,         0.0f}};
    789 	result.c[2] = (v4){{1.0f,  0.0f,             0.0f,             0.0f}};
    790 	result.c[3] = (v4){{x_off, min_coordinate.x, min_coordinate.y, 1.0f}};
    791 	return result;
    792 }
    793 
    794 function m4
    795 das_transform_2d_xy(v2 min_coordinate, v2 max_coordinate, f32 z_off)
    796 {
    797 	v2 extent = v2_sub(max_coordinate, min_coordinate);
    798 
    799 	m4 result;
    800 	result.c[0] = (v4){{extent.x,         0.0f,             0.0f,  0.0f}};
    801 	result.c[1] = (v4){{0.0f,             extent.y,         0.0f,  0.0f}};
    802 	result.c[2] = (v4){{0.0f,             0.0f,             1.0f,  0.0f}};
    803 	result.c[3] = (v4){{min_coordinate.x, min_coordinate.y, z_off, 1.0f}};
    804 	return result;
    805 }
    806 
    807 function m4
    808 das_transform_3d(v3 min_coordinate, v3 max_coordinate)
    809 {
    810 	v3 extent = v3_sub(max_coordinate, min_coordinate);
    811 	m4 result;
    812 	result.c[0] = (v4){{extent.x,         0.0f,             0.0f,             0.0f}};
    813 	result.c[1] = (v4){{0.0f,             extent.y,         0.0f,             0.0f}};
    814 	result.c[2] = (v4){{0.0f,             0.0f,             extent.z,         0.0f}};
    815 	result.c[3] = (v4){{min_coordinate.x, min_coordinate.y, min_coordinate.z, 1.0f}};
    816 	return result;
    817 }
    818 
    819 function m4
    820 das_transform(v3 min_coordinate, v3 max_coordinate, iv3 *points)
    821 {
    822 	m4 result;
    823 
    824 	*points = das_output_dimension(*points);
    825 
    826 	switch (iv3_dimension(*points)) {
    827 	case 1:{result = das_transform_1d(      min_coordinate,     max_coordinate);    }break;
    828 	case 2:{result = das_transform_2d_xz(XY(min_coordinate), XY(max_coordinate), 0);}break;
    829 	case 3:{result = das_transform_3d(      min_coordinate,     max_coordinate);    }break;
    830 	}
    831 
    832 	return result;
    833 }
    834 
    835 function v2
    836 plane_uv(v3 point, v3 U, v3 V)
    837 {
    838 	v2 result;
    839 	result.x = v3_dot(U, point) / v3_dot(U, U);
    840 	result.y = v3_dot(V, point) / v3_dot(V, V);
    841 	return result;
    842 }
    843 
    844 function v4
    845 hsv_to_rgb(v4 hsv)
    846 {
    847 	/* f(k(n))   = V - V*S*max(0, min(k, min(4 - k, 1)))
    848 	 * k(n)      = fmod((n + H * 6), 6)
    849 	 * (R, G, B) = (f(n = 5), f(n = 3), f(n = 1))
    850 	 */
    851 	alignas(16) f32 nval[4] = {5.0f, 3.0f, 1.0f, 0.0f};
    852 	f32x4 n   = load_f32x4(nval);
    853 	f32x4 H   = dup_f32x4(hsv.x);
    854 	f32x4 S   = dup_f32x4(hsv.y);
    855 	f32x4 V   = dup_f32x4(hsv.z);
    856 	f32x4 six = dup_f32x4(6);
    857 
    858 	f32x4 t   = add_f32x4(n, mul_f32x4(six, H));
    859 	f32x4 rem = floor_f32x4(div_f32x4(t, six));
    860 	f32x4 k   = sub_f32x4(t, mul_f32x4(rem, six));
    861 
    862 	t = min_f32x4(sub_f32x4(dup_f32x4(4), k), dup_f32x4(1));
    863 	t = max_f32x4(dup_f32x4(0), min_f32x4(k, t));
    864 	t = mul_f32x4(t, mul_f32x4(S, V));
    865 
    866 	v4 rgba;
    867 	store_f32x4(rgba.E, sub_f32x4(V, t));
    868 	rgba.a = hsv.a;
    869 	return rgba;
    870 }
    871 
    872 function f32
    873 ease_in_out_cubic(f32 t)
    874 {
    875 	f32 result;
    876 	if (t < 0.5f) {
    877 		result = 4.0f * t * t * t;
    878 	} else {
    879 		t      = -2.0f * t + 2.0f;
    880 		result =  1.0f - t * t * t / 2.0f;
    881 	}
    882 	return result;
    883 }
    884 
    885 function f32
    886 ease_in_out_quartic(f32 t)
    887 {
    888 	f32 result;
    889 	if (t < 0.5f) {
    890 		result = 8.0f * t * t * t * t;
    891 	} else {
    892 		t      = -2.0f * t + 2.0f;
    893 		result =  1.0f - t * t * t * t / 2.0f;
    894 	}
    895 	return result;
    896 }