ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

math.c (19867B)


      1 #include "external/cephes.c"
      2 
      3 function void
      4 fill_kronecker_sub_matrix(i32 *out, i32 out_stride, i32 scale, i32 *b, iv2 b_dim)
      5 {
      6 	f32x4 vscale = dup_f32x4((f32)scale);
      7 	for (i32 i = 0; i < b_dim.y; i++) {
      8 		for (i32 j = 0; j < b_dim.x; j += 4, b += 4) {
      9 			f32x4 vb = cvt_i32x4_f32x4(load_i32x4(b));
     10 			store_i32x4(out + j, cvt_f32x4_i32x4(mul_f32x4(vscale, vb)));
     11 		}
     12 		out += out_stride;
     13 	}
     14 }
     15 
     16 /* NOTE: this won't check for valid space/etc and assumes row major order */
     17 function void
     18 kronecker_product(i32 *out, i32 *a, iv2 a_dim, i32 *b, iv2 b_dim)
     19 {
     20 	iv2 out_dim = {{a_dim.x * b_dim.x, a_dim.y * b_dim.y}};
     21 	assert(out_dim.y % 4 == 0);
     22 	for (i32 i = 0; i < a_dim.y; i++) {
     23 		i32 *vout = out;
     24 		for (i32 j = 0; j < a_dim.x; j++, a++) {
     25 			fill_kronecker_sub_matrix(vout, out_dim.y, *a, b, b_dim);
     26 			vout += b_dim.y;
     27 		}
     28 		out += out_dim.y * b_dim.x;
     29 	}
     30 }
     31 
     32 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */
     33 function i32 *
     34 make_hadamard_transpose(Arena *a, i32 dim)
     35 {
     36 	read_only local_persist	i32 hadamard_12_12_transpose[] = {
     37 		1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
     38 		1, -1, -1,  1, -1, -1, -1,  1,  1,  1, -1,  1,
     39 		1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1, -1,
     40 		1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1,
     41 		1,  1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,
     42 		1,  1,  1, -1,  1, -1, -1,  1, -1, -1, -1,  1,
     43 		1,  1,  1,  1, -1,  1, -1, -1,  1, -1, -1, -1,
     44 		1, -1,  1,  1,  1, -1,  1, -1, -1,  1, -1, -1,
     45 		1, -1, -1,  1,  1,  1, -1,  1, -1, -1,  1, -1,
     46 		1, -1, -1, -1,  1,  1,  1, -1,  1, -1, -1,  1,
     47 		1,  1, -1, -1, -1,  1,  1,  1, -1,  1, -1, -1,
     48 		1, -1,  1, -1, -1, -1,  1,  1,  1, -1,  1, -1,
     49 	};
     50 
     51 	read_only local_persist i32 hadamard_20_20_transpose[] = {
     52 		1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
     53 		1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1,
     54 		1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1,
     55 		1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,
     56 		1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,
     57 		1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1,
     58 		1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1,
     59 		1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1,
     60 		1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1,
     61 		1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,
     62 		1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1,
     63 		1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,
     64 		1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1,
     65 		1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,
     66 		1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,
     67 		1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,
     68 		1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,
     69 		1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1,
     70 		1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1,
     71 		1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,
     72 	};
     73 
     74 	i32 *result = 0;
     75 
     76 	b32 power_of_2     = ISPOWEROF2(dim);
     77 	b32 multiple_of_12 = dim % 12 == 0;
     78 	b32 multiple_of_20 = dim % 20 == 0;
     79 	iz elements        = dim * dim;
     80 
     81 	i32 base_dim = 0;
     82 	if (power_of_2) {
     83 		base_dim  = dim;
     84 	} else if (multiple_of_20 && ISPOWEROF2(dim / 20)) {
     85 		base_dim  = 20;
     86 		dim      /= 20;
     87 	} else if (multiple_of_12 && ISPOWEROF2(dim / 12)) {
     88 		base_dim  = 12;
     89 		dim      /= 12;
     90 	}
     91 
     92 	if (ISPOWEROF2(dim) && base_dim && arena_capacity(a, i32) >= elements * (1 + (dim != base_dim))) {
     93 		result = push_array(a, i32, elements);
     94 
     95 		Arena tmp = *a;
     96 		i32 *m = dim == base_dim ? result : push_array(&tmp, i32, elements);
     97 
     98 		#define IND(i, j) ((i) * dim + (j))
     99 		m[0] = 1;
    100 		for (i32 k = 1; k < dim; k *= 2) {
    101 			for (i32 i = 0; i < k; i++) {
    102 				for (i32 j = 0; j < k; j++) {
    103 					i32 val = m[IND(i, j)];
    104 					m[IND(i + k, j)]     =  val;
    105 					m[IND(i, j + k)]     =  val;
    106 					m[IND(i + k, j + k)] = -val;
    107 				}
    108 			}
    109 		}
    110 		#undef IND
    111 
    112 		i32 *m2 = 0;
    113 		iv2 m2_dim;
    114 		switch (base_dim) {
    115 		case 12:{ m2 = hadamard_12_12_transpose; m2_dim = (iv2){{12, 12}}; }break;
    116 		case 20:{ m2 = hadamard_20_20_transpose; m2_dim = (iv2){{20, 20}}; }break;
    117 		}
    118 		if (m2) kronecker_product(result, m, (iv2){{dim, dim}}, m2, m2_dim);
    119 	}
    120 
    121 	return result;
    122 }
    123 
    124 function b32
    125 iv2_equal(iv2 a, iv2 b)
    126 {
    127 	b32 result = a.x == b.x && a.y == b.y;
    128 	return result;
    129 }
    130 
    131 function b32
    132 iv3_equal(iv3 a, iv3 b)
    133 {
    134 	b32 result = a.x == b.x && a.y == b.y && a.z == b.z;
    135 	return result;
    136 }
    137 
    138 function v2
    139 clamp_v2_rect(v2 v, Rect r)
    140 {
    141 	v2 result = v;
    142 	result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x);
    143 	result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y);
    144 	return result;
    145 }
    146 
    147 function v2
    148 v2_scale(v2 a, f32 scale)
    149 {
    150 	v2 result;
    151 	result.x = a.x * scale;
    152 	result.y = a.y * scale;
    153 	return result;
    154 }
    155 
    156 function v2
    157 v2_add(v2 a, v2 b)
    158 {
    159 	v2 result;
    160 	result.x = a.x + b.x;
    161 	result.y = a.y + b.y;
    162 	return result;
    163 }
    164 
    165 function v2
    166 v2_sub(v2 a, v2 b)
    167 {
    168 	v2 result = v2_add(a, v2_scale(b, -1.0f));
    169 	return result;
    170 }
    171 
    172 function v2
    173 v2_mul(v2 a, v2 b)
    174 {
    175 	v2 result;
    176 	result.x = a.x * b.x;
    177 	result.y = a.y * b.y;
    178 	return result;
    179 }
    180 
    181 function v2
    182 v2_div(v2 a, v2 b)
    183 {
    184 	v2 result;
    185 	result.x = a.x / b.x;
    186 	result.y = a.y / b.y;
    187 	return result;
    188 }
    189 
    190 function v2
    191 v2_floor(v2 a)
    192 {
    193 	v2 result;
    194 	result.x = (f32)((i32)a.x);
    195 	result.y = (f32)((i32)a.y);
    196 	return result;
    197 }
    198 
    199 function f32
    200 v2_magnitude_squared(v2 a)
    201 {
    202 	f32 result = a.x * a.x + a.y * a.y;
    203 	return result;
    204 }
    205 
    206 function f32
    207 v2_magnitude(v2 a)
    208 {
    209 	f32 result = sqrt_f32(a.x * a.x + a.y * a.y);
    210 	return result;
    211 }
    212 
    213 function v3
    214 cross(v3 a, v3 b)
    215 {
    216 	v3 result;
    217 	result.x = a.y * b.z - a.z * b.y;
    218 	result.y = a.z * b.x - a.x * b.z;
    219 	result.z = a.x * b.y - a.y * b.x;
    220 	return result;
    221 }
    222 
    223 function v3
    224 v3_from_f32_array(f32 v[3])
    225 {
    226 	v3 result;
    227 	result.E[0] = v[0];
    228 	result.E[1] = v[1];
    229 	result.E[2] = v[2];
    230 	return result;
    231 }
    232 
    233 function v3
    234 v3_abs(v3 a)
    235 {
    236 	v3 result;
    237 	result.x = ABS(a.x);
    238 	result.y = ABS(a.y);
    239 	result.z = ABS(a.z);
    240 	return result;
    241 }
    242 
    243 function v3
    244 v3_scale(v3 a, f32 scale)
    245 {
    246 	v3 result;
    247 	result.x = scale * a.x;
    248 	result.y = scale * a.y;
    249 	result.z = scale * a.z;
    250 	return result;
    251 }
    252 
    253 function v3
    254 v3_add(v3 a, v3 b)
    255 {
    256 	v3 result;
    257 	result.x = a.x + b.x;
    258 	result.y = a.y + b.y;
    259 	result.z = a.z + b.z;
    260 	return result;
    261 }
    262 
    263 function v3
    264 v3_sub(v3 a, v3 b)
    265 {
    266 	v3 result = v3_add(a, v3_scale(b, -1.0f));
    267 	return result;
    268 }
    269 
    270 function v3
    271 v3_div(v3 a, v3 b)
    272 {
    273 	v3 result;
    274 	result.x = a.x / b.x;
    275 	result.y = a.y / b.y;
    276 	result.z = a.z / b.z;
    277 	return result;
    278 }
    279 
    280 function f32
    281 v3_dot(v3 a, v3 b)
    282 {
    283 	f32 result = a.x * b.x + a.y * b.y + a.z * b.z;
    284 	return result;
    285 }
    286 
    287 function f32
    288 v3_magnitude_squared(v3 a)
    289 {
    290 	f32 result = v3_dot(a, a);
    291 	return result;
    292 }
    293 
    294 function f32
    295 v3_magnitude(v3 a)
    296 {
    297 	f32 result = sqrt_f32(v3_dot(a, a));
    298 	return result;
    299 }
    300 
    301 function v3
    302 v3_normalize(v3 a)
    303 {
    304 	v3 result = v3_scale(a, 1.0f / v3_magnitude(a));
    305 	return result;
    306 }
    307 
    308 function v4
    309 v4_scale(v4 a, f32 scale)
    310 {
    311 	v4 result;
    312 	result.x = scale * a.x;
    313 	result.y = scale * a.y;
    314 	result.z = scale * a.z;
    315 	result.w = scale * a.w;
    316 	return result;
    317 }
    318 
    319 function v4
    320 v4_add(v4 a, v4 b)
    321 {
    322 	v4 result;
    323 	result.x = a.x + b.x;
    324 	result.y = a.y + b.y;
    325 	result.z = a.z + b.z;
    326 	result.w = a.w + b.w;
    327 	return result;
    328 }
    329 
    330 function v4
    331 v4_sub(v4 a, v4 b)
    332 {
    333 	v4 result = v4_add(a, v4_scale(b, -1));
    334 	return result;
    335 }
    336 
    337 function f32
    338 v4_dot(v4 a, v4 b)
    339 {
    340 	f32 result = a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
    341 	return result;
    342 }
    343 
    344 function v4
    345 v4_lerp(v4 a, v4 b, f32 t)
    346 {
    347 	v4 result = v4_add(a, v4_scale(v4_sub(b, a), t));
    348 	return result;
    349 }
    350 
    351 function m4
    352 m4_identity(void)
    353 {
    354 	m4 result;
    355 	result.c[0] = (v4){{1, 0, 0, 0}};
    356 	result.c[1] = (v4){{0, 1, 0, 0}};
    357 	result.c[2] = (v4){{0, 0, 1, 0}};
    358 	result.c[3] = (v4){{0, 0, 0, 1}};
    359 	return result;
    360 }
    361 
    362 function v4
    363 m4_row(m4 a, u32 row)
    364 {
    365 	v4 result;
    366 	result.E[0] = a.c[0].E[row];
    367 	result.E[1] = a.c[1].E[row];
    368 	result.E[2] = a.c[2].E[row];
    369 	result.E[3] = a.c[3].E[row];
    370 	return result;
    371 }
    372 
    373 function m4
    374 m4_mul(m4 a, m4 b)
    375 {
    376 	m4 result;
    377 	for (u32 i = 0; i < 4; i++) {
    378 		for (u32 j = 0; j < 4; j++) {
    379 			result.c[i].E[j] = v4_dot(m4_row(a, j), b.c[i]);
    380 		}
    381 	}
    382 	return result;
    383 }
    384 
    385 /* NOTE(rnp): based on:
    386  * https://web.archive.org/web/20131215123403/ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf
    387  * TODO(rnp): redo with SIMD as given in the link (but need to rewrite for column-major)
    388  */
    389 function m4
    390 m4_inverse(m4 m)
    391 {
    392 	m4 result;
    393 	result.E[ 0] =  m.E[5] * m.E[10] * m.E[15] - m.E[5] * m.E[11] * m.E[14] - m.E[9] * m.E[6] * m.E[15] + m.E[9] * m.E[7] * m.E[14] + m.E[13] * m.E[6] * m.E[11] - m.E[13] * m.E[7] * m.E[10];
    394 	result.E[ 4] = -m.E[4] * m.E[10] * m.E[15] + m.E[4] * m.E[11] * m.E[14] + m.E[8] * m.E[6] * m.E[15] - m.E[8] * m.E[7] * m.E[14] - m.E[12] * m.E[6] * m.E[11] + m.E[12] * m.E[7] * m.E[10];
    395 	result.E[ 8] =  m.E[4] * m.E[ 9] * m.E[15] - m.E[4] * m.E[11] * m.E[13] - m.E[8] * m.E[5] * m.E[15] + m.E[8] * m.E[7] * m.E[13] + m.E[12] * m.E[5] * m.E[11] - m.E[12] * m.E[7] * m.E[ 9];
    396 	result.E[12] = -m.E[4] * m.E[ 9] * m.E[14] + m.E[4] * m.E[10] * m.E[13] + m.E[8] * m.E[5] * m.E[14] - m.E[8] * m.E[6] * m.E[13] - m.E[12] * m.E[5] * m.E[10] + m.E[12] * m.E[6] * m.E[ 9];
    397 	result.E[ 1] = -m.E[1] * m.E[10] * m.E[15] + m.E[1] * m.E[11] * m.E[14] + m.E[9] * m.E[2] * m.E[15] - m.E[9] * m.E[3] * m.E[14] - m.E[13] * m.E[2] * m.E[11] + m.E[13] * m.E[3] * m.E[10];
    398 	result.E[ 5] =  m.E[0] * m.E[10] * m.E[15] - m.E[0] * m.E[11] * m.E[14] - m.E[8] * m.E[2] * m.E[15] + m.E[8] * m.E[3] * m.E[14] + m.E[12] * m.E[2] * m.E[11] - m.E[12] * m.E[3] * m.E[10];
    399 	result.E[ 9] = -m.E[0] * m.E[ 9] * m.E[15] + m.E[0] * m.E[11] * m.E[13] + m.E[8] * m.E[1] * m.E[15] - m.E[8] * m.E[3] * m.E[13] - m.E[12] * m.E[1] * m.E[11] + m.E[12] * m.E[3] * m.E[ 9];
    400 	result.E[13] =  m.E[0] * m.E[ 9] * m.E[14] - m.E[0] * m.E[10] * m.E[13] - m.E[8] * m.E[1] * m.E[14] + m.E[8] * m.E[2] * m.E[13] + m.E[12] * m.E[1] * m.E[10] - m.E[12] * m.E[2] * m.E[ 9];
    401 	result.E[ 2] =  m.E[1] * m.E[ 6] * m.E[15] - m.E[1] * m.E[ 7] * m.E[14] - m.E[5] * m.E[2] * m.E[15] + m.E[5] * m.E[3] * m.E[14] + m.E[13] * m.E[2] * m.E[ 7] - m.E[13] * m.E[3] * m.E[ 6];
    402 	result.E[ 6] = -m.E[0] * m.E[ 6] * m.E[15] + m.E[0] * m.E[ 7] * m.E[14] + m.E[4] * m.E[2] * m.E[15] - m.E[4] * m.E[3] * m.E[14] - m.E[12] * m.E[2] * m.E[ 7] + m.E[12] * m.E[3] * m.E[ 6];
    403 	result.E[10] =  m.E[0] * m.E[ 5] * m.E[15] - m.E[0] * m.E[ 7] * m.E[13] - m.E[4] * m.E[1] * m.E[15] + m.E[4] * m.E[3] * m.E[13] + m.E[12] * m.E[1] * m.E[ 7] - m.E[12] * m.E[3] * m.E[ 5];
    404 	result.E[14] = -m.E[0] * m.E[ 5] * m.E[14] + m.E[0] * m.E[ 6] * m.E[13] + m.E[4] * m.E[1] * m.E[14] - m.E[4] * m.E[2] * m.E[13] - m.E[12] * m.E[1] * m.E[ 6] + m.E[12] * m.E[2] * m.E[ 5];
    405 	result.E[ 3] = -m.E[1] * m.E[ 6] * m.E[11] + m.E[1] * m.E[ 7] * m.E[10] + m.E[5] * m.E[2] * m.E[11] - m.E[5] * m.E[3] * m.E[10] - m.E[ 9] * m.E[2] * m.E[ 7] + m.E[ 9] * m.E[3] * m.E[ 6];
    406 	result.E[ 7] =  m.E[0] * m.E[ 6] * m.E[11] - m.E[0] * m.E[ 7] * m.E[10] - m.E[4] * m.E[2] * m.E[11] + m.E[4] * m.E[3] * m.E[10] + m.E[ 8] * m.E[2] * m.E[ 7] - m.E[ 8] * m.E[3] * m.E[ 6];
    407 	result.E[11] = -m.E[0] * m.E[ 5] * m.E[11] + m.E[0] * m.E[ 7] * m.E[ 9] + m.E[4] * m.E[1] * m.E[11] - m.E[4] * m.E[3] * m.E[ 9] - m.E[ 8] * m.E[1] * m.E[ 7] + m.E[ 8] * m.E[3] * m.E[ 5];
    408 	result.E[15] =  m.E[0] * m.E[ 5] * m.E[10] - m.E[0] * m.E[ 6] * m.E[ 9] - m.E[4] * m.E[1] * m.E[10] + m.E[4] * m.E[2] * m.E[ 9] + m.E[ 8] * m.E[1] * m.E[ 6] - m.E[ 8] * m.E[2] * m.E[ 5];
    409 
    410 	f32 determinant = m.E[0] * result.E[0] + m.E[1] * result.E[4] + m.E[2] * result.E[8] + m.E[3] * result.E[12];
    411 	determinant = 1.0f / determinant;
    412 	for(i32 i = 0; i < 16; i++)
    413 		result.E[i] *= determinant;
    414 	return result;
    415 }
    416 
    417 function m4
    418 m4_translation(v3 delta)
    419 {
    420 	m4 result;
    421 	result.c[0] = (v4){{1, 0, 0, 0}};
    422 	result.c[1] = (v4){{0, 1, 0, 0}};
    423 	result.c[2] = (v4){{0, 0, 1, 0}};
    424 	result.c[3] = (v4){{delta.x, delta.y, delta.z, 1}};
    425 	return result;
    426 }
    427 
    428 function m4
    429 m4_scale(v3 scale)
    430 {
    431 	m4 result;
    432 	result.c[0] = (v4){{scale.x, 0,       0,       0}};
    433 	result.c[1] = (v4){{0,       scale.y, 0,       0}};
    434 	result.c[2] = (v4){{0,       0,       scale.z, 0}};
    435 	result.c[3] = (v4){{0,       0,       0,       1}};
    436 	return result;
    437 }
    438 
    439 function m4
    440 m4_rotation_about_z(f32 turns)
    441 {
    442 	f32 sa = sin_f32(turns * 2 * PI);
    443 	f32 ca = cos_f32(turns * 2 * PI);
    444 	m4 result;
    445 	result.c[0] = (v4){{ca, -sa, 0, 0}};
    446 	result.c[1] = (v4){{sa,  ca, 0, 0}};
    447 	result.c[2] = (v4){{0,    0, 1, 0}};
    448 	result.c[3] = (v4){{0,    0, 0, 1}};
    449 	return result;
    450 }
    451 
    452 function m4
    453 m4_rotation_about_y(f32 turns)
    454 {
    455 	f32 sa = sin_f32(turns * 2 * PI);
    456 	f32 ca = cos_f32(turns * 2 * PI);
    457 	m4 result;
    458 	result.c[0] = (v4){{ca, 0, -sa, 0}};
    459 	result.c[1] = (v4){{0,  1,  0,  0}};
    460 	result.c[2] = (v4){{sa, 0,  ca, 0}};
    461 	result.c[3] = (v4){{0,  0,  0,  1}};
    462 	return result;
    463 }
    464 
    465 function m4
    466 y_aligned_volume_transform(v3 extent, v3 translation, f32 rotation_turns)
    467 {
    468 	m4 T = m4_translation(translation);
    469 	m4 R = m4_rotation_about_y(rotation_turns);
    470 	m4 S = m4_scale(extent);
    471 	m4 result = m4_mul(T, m4_mul(R, S));
    472 	return result;
    473 }
    474 
    475 function v4
    476 m4_mul_v4(m4 a, v4 v)
    477 {
    478 	v4 result;
    479 	result.x = v4_dot(m4_row(a, 0), v);
    480 	result.y = v4_dot(m4_row(a, 1), v);
    481 	result.z = v4_dot(m4_row(a, 2), v);
    482 	result.w = v4_dot(m4_row(a, 3), v);
    483 	return result;
    484 }
    485 
    486 function m4
    487 orthographic_projection(f32 n, f32 f, f32 t, f32 r)
    488 {
    489 	m4 result;
    490 	f32 a = -2 / (f - n);
    491 	f32 b = - (f + n) / (f - n);
    492 	result.c[0] = (v4){{1 / r, 0,     0,  0}};
    493 	result.c[1] = (v4){{0,     1 / t, 0,  0}};
    494 	result.c[2] = (v4){{0,     0,     a,  0}};
    495 	result.c[3] = (v4){{0,     0,     b,  1}};
    496 	return result;
    497 }
    498 
    499 function m4
    500 perspective_projection(f32 n, f32 f, f32 fov, f32 aspect)
    501 {
    502 	m4 result;
    503 	f32 t = tan_f32(fov / 2.0f);
    504 	f32 r = t * aspect;
    505 	f32 a = -(f + n) / (f - n);
    506 	f32 b = -2 * f * n / (f - n);
    507 	result.c[0] = (v4){{1 / r, 0,     0,  0}};
    508 	result.c[1] = (v4){{0,     1 / t, 0,  0}};
    509 	result.c[2] = (v4){{0,     0,     a, -1}};
    510 	result.c[3] = (v4){{0,     0,     b,  0}};
    511 	return result;
    512 }
    513 
    514 function m4
    515 camera_look_at(v3 camera, v3 point)
    516 {
    517 	v3 orthogonal = {{0, 1.0f, 0}};
    518 	v3 normal     = v3_normalize(v3_sub(camera, point));
    519 	v3 right      = cross(orthogonal, normal);
    520 	v3 up         = cross(normal,     right);
    521 
    522 	v3 translate;
    523 	camera      = v3_sub((v3){0}, camera);
    524 	translate.x = v3_dot(camera, right);
    525 	translate.y = v3_dot(camera, up);
    526 	translate.z = v3_dot(camera, normal);
    527 
    528 	m4 result;
    529 	result.c[0] = (v4){{right.x,     up.x,        normal.x,    0}};
    530 	result.c[1] = (v4){{right.y,     up.y,        normal.y,    0}};
    531 	result.c[2] = (v4){{right.z,     up.z,        normal.z,    0}};
    532 	result.c[3] = (v4){{translate.x, translate.y, translate.z, 1}};
    533 	return result;
    534 }
    535 
    536 /* NOTE(rnp): adapted from "Essential Mathematics for Games and Interactive Applications" (Verth, Bishop) */
    537 function f32
    538 obb_raycast(m4 obb_orientation, v3 obb_size, v3 obb_center, ray r)
    539 {
    540 	v3 p = v3_sub(obb_center, r.origin);
    541 	v3 X = obb_orientation.c[0].xyz;
    542 	v3 Y = obb_orientation.c[1].xyz;
    543 	v3 Z = obb_orientation.c[2].xyz;
    544 
    545 	/* NOTE(rnp): projects direction vector onto OBB axis */
    546 	v3 f;
    547 	f.x = v3_dot(X, r.direction);
    548 	f.y = v3_dot(Y, r.direction);
    549 	f.z = v3_dot(Z, r.direction);
    550 
    551 	/* NOTE(rnp): projects relative vector onto OBB axis */
    552 	v3 e;
    553 	e.x = v3_dot(X, p);
    554 	e.y = v3_dot(Y, p);
    555 	e.z = v3_dot(Z, p);
    556 
    557 	f32 result = 0;
    558 	f32 t[6] = {0};
    559 	for (i32 i = 0; i < 3; i++) {
    560 		if (f32_cmp(f.E[i], 0)) {
    561 			if (-e.E[i] - obb_size.E[i] > 0 || -e.E[i] + obb_size.E[i] < 0)
    562 				result = -1.0f;
    563 			f.E[i] = F32_EPSILON;
    564 		}
    565 		t[i * 2 + 0] = (e.E[i] + obb_size.E[i]) / f.E[i];
    566 		t[i * 2 + 1] = (e.E[i] - obb_size.E[i]) / f.E[i];
    567 	}
    568 
    569 	if (result != -1) {
    570 		f32 tmin = MAX(MAX(MIN(t[0], t[1]), MIN(t[2], t[3])), MIN(t[4], t[5]));
    571 		f32 tmax = MIN(MIN(MAX(t[0], t[1]), MAX(t[2], t[3])), MAX(t[4], t[5]));
    572 		if (tmax >= 0 && tmin <= tmax) {
    573 			result = tmin > 0 ? tmin : tmax;
    574 		} else {
    575 			result = -1;
    576 		}
    577 	}
    578 
    579 	return result;
    580 }
    581 
    582 function f32
    583 complex_filter_first_moment(v2 *filter, i32 length, f32 sampling_frequency)
    584 {
    585 	f32 n = 0, d = 0;
    586 	for (i32 i = 0; i < length; i++) {
    587 		f32 t = v2_magnitude_squared(filter[i]);
    588 		n += (f32)i * t;
    589 		d += t;
    590 	}
    591 	f32 result = n / d / sampling_frequency;
    592 	return result;
    593 }
    594 
    595 function f32
    596 real_filter_first_moment(f32 *filter, i32 length, f32 sampling_frequency)
    597 {
    598 	f32 n = 0, d = 0;
    599 	for (i32 i = 0; i < length; i++) {
    600 		f32 t = filter[i] * filter[i];
    601 		n += (f32)i * t;
    602 		d += t;
    603 	}
    604 	f32 result = n / d / sampling_frequency;
    605 	return result;
    606 }
    607 
    608 function f32
    609 tukey_window(f32 t, f32 tapering)
    610 {
    611 	f32 r = tapering;
    612 	f32 result = 1;
    613 	if (t < r / 2)      result = 0.5f * (1 + cos_f32(2 * PI * (t - r / 2)     / r));
    614 	if (t >= 1 - r / 2) result = 0.5f * (1 + cos_f32(2 * PI * (t - 1 + r / 2) / r));
    615 	return result;
    616 }
    617 
    618 /* NOTE(rnp): adapted from "Discrete Time Signal Processing" (Oppenheim) */
    619 function f32 *
    620 kaiser_low_pass_filter(Arena *arena, f32 cutoff_frequency, f32 sampling_frequency, f32 beta, i32 length)
    621 {
    622 	f32 *result = push_array(arena, f32, length);
    623 	f32 wc      = 2 * PI * cutoff_frequency / sampling_frequency;
    624 	f32 a       = (f32)length / 2.0f;
    625 	f32 pi_i0_b = PI * (f32)cephes_i0(beta);
    626 
    627 	for (i32 n = 0; n < length; n++) {
    628 		f32 t       = (f32)n - a;
    629 		f32 impulse = !f32_cmp(t, 0) ? sin_f32(wc * t) / t : wc;
    630 		t           = t / a;
    631 		f32 window  = (f32)cephes_i0(beta * sqrt_f32(1 - t * t)) / pi_i0_b;
    632 		result[n]   = impulse * window;
    633 	}
    634 
    635 	return result;
    636 }
    637 
    638 function f32 *
    639 rf_chirp(Arena *arena, f32 min_frequency, f32 max_frequency, f32 sampling_frequency,
    640          i32 length, b32 reverse)
    641 {
    642 	f32 *result = push_array(arena, f32, length);
    643 	for (i32 i = 0; i < length; i++) {
    644 		i32 index = reverse? length - 1 - i : i;
    645 		f32 fc    = min_frequency + (f32)i * (max_frequency - min_frequency) / (2 * (f32)length);
    646 		f32 arg   = 2 * PI * fc * (f32)i / sampling_frequency;
    647 		result[index] = sin_f32(arg) * tukey_window((f32)i / (f32)length, 0.2f);
    648 	}
    649 	return result;
    650 }
    651 
    652 function v2 *
    653 baseband_chirp(Arena *arena, f32 min_frequency, f32 max_frequency, f32 sampling_frequency,
    654                i32 length, b32 reverse, f32 scale)
    655 {
    656 	v2 *result    = push_array(arena, v2, length);
    657 	f32 conjugate = reverse ? -1 : 1;
    658 	for (i32 i = 0; i < length; i++) {
    659 		i32 index = reverse? length - 1 - i : i;
    660 		f32 fc    = min_frequency + (f32)i * (max_frequency - min_frequency) / (2 * (f32)length);
    661 		f32 arg   = 2 * PI * fc * (f32)i / sampling_frequency;
    662 		v2 sample = {{scale * cos_f32(arg), conjugate * scale * sin_f32(arg)}};
    663 		result[index] = v2_scale(sample, tukey_window((f32)i / (f32)length, 0.2f));
    664 	}
    665 	return result;
    666 }
    667 
    668 function v4
    669 hsv_to_rgb(v4 hsv)
    670 {
    671 	/* f(k(n))   = V - V*S*max(0, min(k, min(4 - k, 1)))
    672 	 * k(n)      = fmod((n + H * 6), 6)
    673 	 * (R, G, B) = (f(n = 5), f(n = 3), f(n = 1))
    674 	 */
    675 	alignas(16) f32 nval[4] = {5.0f, 3.0f, 1.0f, 0.0f};
    676 	f32x4 n   = load_f32x4(nval);
    677 	f32x4 H   = dup_f32x4(hsv.x);
    678 	f32x4 S   = dup_f32x4(hsv.y);
    679 	f32x4 V   = dup_f32x4(hsv.z);
    680 	f32x4 six = dup_f32x4(6);
    681 
    682 	f32x4 t   = add_f32x4(n, mul_f32x4(six, H));
    683 	f32x4 rem = floor_f32x4(div_f32x4(t, six));
    684 	f32x4 k   = sub_f32x4(t, mul_f32x4(rem, six));
    685 
    686 	t = min_f32x4(sub_f32x4(dup_f32x4(4), k), dup_f32x4(1));
    687 	t = max_f32x4(dup_f32x4(0), min_f32x4(k, t));
    688 	t = mul_f32x4(t, mul_f32x4(S, V));
    689 
    690 	v4 rgba;
    691 	store_f32x4(rgba.E, sub_f32x4(V, t));
    692 	rgba.a = hsv.a;
    693 	return rgba;
    694 }
    695 
    696 function f32
    697 ease_in_out_cubic(f32 t)
    698 {
    699 	f32 result;
    700 	if (t < 0.5f) {
    701 		result = 4.0f * t * t * t;
    702 	} else {
    703 		t      = -2.0f * t + 2.0f;
    704 		result =  1.0f - t * t * t / 2.0f;
    705 	}
    706 	return result;
    707 }
    708 
    709 function f32
    710 ease_in_out_quartic(f32 t)
    711 {
    712 	f32 result;
    713 	if (t < 0.5f) {
    714 		result = 8.0f * t * t * t * t;
    715 	} else {
    716 		t      = -2.0f * t + 2.0f;
    717 		result =  1.0f - t * t * t * t / 2.0f;
    718 	}
    719 	return result;
    720 }