math.c - ogl_beamforming - Ultrasound Beamforming Implemented with OpenGL

math.c (15788B)
      1 function void
      2 fill_kronecker_sub_matrix(i32 *out, i32 out_stride, i32 scale, i32 *b, uv2 b_dim)
      3 {
      4 	f32x4 vscale = dup_f32x4(scale);
      5 	for (u32 i = 0; i < b_dim.y; i++) {
      6 		for (u32 j = 0; j < b_dim.x; j += 4, b += 4) {
      7 			f32x4 vb = cvt_i32x4_f32x4(load_i32x4(b));
      8 			store_i32x4(cvt_f32x4_i32x4(mul_f32x4(vscale, vb)), out + j);
      9 		}
     10 		out += out_stride;
     11 	}
     12 }
     13 
     14 /* NOTE: this won't check for valid space/etc and assumes row major order */
     15 function void
     16 kronecker_product(i32 *out, i32 *a, uv2 a_dim, i32 *b, uv2 b_dim)
     17 {
     18 	uv2 out_dim = {.x = a_dim.x * b_dim.x, .y = a_dim.y * b_dim.y};
     19 	ASSERT(out_dim.y % 4 == 0);
     20 	for (u32 i = 0; i < a_dim.y; i++) {
     21 		i32 *vout = out;
     22 		for (u32 j = 0; j < a_dim.x; j++, a++) {
     23 			fill_kronecker_sub_matrix(vout, out_dim.y, *a, b, b_dim);
     24 			vout += b_dim.y;
     25 		}
     26 		out += out_dim.y * b_dim.x;
     27 	}
     28 }
     29 
     30 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */
     31 function i32 *
     32 make_hadamard_transpose(Arena *a, u32 dim)
     33 {
     34 	read_only local_persist	i32 hadamard_12_12_transpose[] = {
     35 		1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
     36 		1, -1, -1,  1, -1, -1, -1,  1,  1,  1, -1,  1,
     37 		1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1, -1,
     38 		1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1,
     39 		1,  1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,
     40 		1,  1,  1, -1,  1, -1, -1,  1, -1, -1, -1,  1,
     41 		1,  1,  1,  1, -1,  1, -1, -1,  1, -1, -1, -1,
     42 		1, -1,  1,  1,  1, -1,  1, -1, -1,  1, -1, -1,
     43 		1, -1, -1,  1,  1,  1, -1,  1, -1, -1,  1, -1,
     44 		1, -1, -1, -1,  1,  1,  1, -1,  1, -1, -1,  1,
     45 		1,  1, -1, -1, -1,  1,  1,  1, -1,  1, -1, -1,
     46 		1, -1,  1, -1, -1, -1,  1,  1,  1, -1,  1, -1,
     47 	};
     48 
     49 	read_only local_persist i32 hadamard_20_20_transpose[] = {
     50 		1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
     51 		1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1,
     52 		1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1,
     53 		1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,
     54 		1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,
     55 		1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1,
     56 		1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1,
     57 		1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1,
     58 		1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1,
     59 		1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,
     60 		1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1,
     61 		1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,
     62 		1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1,
     63 		1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,
     64 		1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,
     65 		1,  1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,
     66 		1,  1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,
     67 		1, -1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1,
     68 		1, -1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1,
     69 		1,  1, -1, -1,  1,  1, -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1,  1, -1, -1,
     70 	};
     71 
     72 	i32 *result = 0;
     73 
     74 	b32 power_of_2     = ISPOWEROF2(dim);
     75 	b32 multiple_of_12 = dim % 12 == 0;
     76 	b32 multiple_of_20 = dim % 20 == 0;
     77 	iz elements        = dim * dim;
     78 
     79 	u32 base_dim = 0;
     80 	if (power_of_2) {
     81 		base_dim  = dim;
     82 	} else if (multiple_of_20 && ISPOWEROF2(dim / 20)) {
     83 		base_dim  = 20;
     84 		dim      /= 20;
     85 	} else if (multiple_of_12 && ISPOWEROF2(dim / 12)) {
     86 		base_dim  = 12;
     87 		dim      /= 12;
     88 	}
     89 
     90 	if (ISPOWEROF2(dim) && base_dim && arena_capacity(a, i32) >= elements * (1 + (dim != base_dim))) {
     91 		result = push_array(a, i32, elements);
     92 
     93 		Arena tmp = *a;
     94 		i32 *m = dim == base_dim ? result : push_array(&tmp, i32, elements);
     95 
     96 		#define IND(i, j) ((i) * dim + (j))
     97 		m[0] = 1;
     98 		for (u32 k = 1; k < dim; k *= 2) {
     99 			for (u32 i = 0; i < k; i++) {
    100 				for (u32 j = 0; j < k; j++) {
    101 					i32 val = m[IND(i, j)];
    102 					m[IND(i + k, j)]     =  val;
    103 					m[IND(i, j + k)]     =  val;
    104 					m[IND(i + k, j + k)] = -val;
    105 				}
    106 			}
    107 		}
    108 		#undef IND
    109 
    110 		i32 *m2 = 0;
    111 		uv2 m2_dim;
    112 		switch (base_dim) {
    113 		case 12:{ m2 = hadamard_12_12_transpose; m2_dim = (uv2){{12, 12}}; }break;
    114 		case 20:{ m2 = hadamard_20_20_transpose; m2_dim = (uv2){{20, 20}}; }break;
    115 		}
    116 		if (m2) kronecker_product(result, m, (uv2){{dim, dim}}, m2, m2_dim);
    117 	}
    118 
    119 	return result;
    120 }
    121 
    122 function b32
    123 uv2_equal(uv2 a, uv2 b)
    124 {
    125 	return a.x == b.x && a.y == b.y;
    126 }
    127 
    128 function b32
    129 uv3_equal(uv3 a, uv3 b)
    130 {
    131 	return a.x == b.x && a.y == b.y && a.z == b.z;
    132 }
    133 
    134 function v2
    135 clamp_v2_rect(v2 v, Rect r)
    136 {
    137 	v2 result = v;
    138 	result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x);
    139 	result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y);
    140 	return result;
    141 }
    142 
    143 function v2
    144 v2_scale(v2 a, f32 scale)
    145 {
    146 	v2 result;
    147 	result.x = a.x * scale;
    148 	result.y = a.y * scale;
    149 	return result;
    150 }
    151 
    152 function v2
    153 v2_add(v2 a, v2 b)
    154 {
    155 	v2 result;
    156 	result.x = a.x + b.x;
    157 	result.y = a.y + b.y;
    158 	return result;
    159 }
    160 
    161 function v2
    162 v2_sub(v2 a, v2 b)
    163 {
    164 	v2 result = v2_add(a, v2_scale(b, -1.0f));
    165 	return result;
    166 }
    167 
    168 function v2
    169 v2_mul(v2 a, v2 b)
    170 {
    171 	v2 result;
    172 	result.x = a.x * b.x;
    173 	result.y = a.y * b.y;
    174 	return result;
    175 }
    176 
    177 function v2
    178 v2_div(v2 a, v2 b)
    179 {
    180 	v2 result;
    181 	result.x = a.x / b.x;
    182 	result.y = a.y / b.y;
    183 	return result;
    184 }
    185 
    186 function v2
    187 v2_floor(v2 a)
    188 {
    189 	v2 result;
    190 	result.x = (i32)a.x;
    191 	result.y = (i32)a.y;
    192 	return result;
    193 }
    194 
    195 function f32
    196 v2_magnitude(v2 a)
    197 {
    198 	f32 result = sqrt_f32(a.x * a.x + a.y * a.y);
    199 	return result;
    200 }
    201 
    202 
    203 function v3
    204 cross(v3 a, v3 b)
    205 {
    206 	v3 result;
    207 	result.x = a.y * b.z - a.z * b.y;
    208 	result.y = a.z * b.x - a.x * b.z;
    209 	result.z = a.x * b.y - a.y * b.x;
    210 	return result;
    211 }
    212 
    213 function v3
    214 v3_scale(v3 a, f32 scale)
    215 {
    216 	v3 result;
    217 	result.x = scale * a.x;
    218 	result.y = scale * a.y;
    219 	result.z = scale * a.z;
    220 	return result;
    221 }
    222 
    223 function v3
    224 v3_add(v3 a, v3 b)
    225 {
    226 	v3 result;
    227 	result.x = a.x + b.x;
    228 	result.y = a.y + b.y;
    229 	result.z = a.z + b.z;
    230 	return result;
    231 }
    232 
    233 function v3
    234 v3_sub(v3 a, v3 b)
    235 {
    236 	v3 result = v3_add(a, v3_scale(b, -1.0f));
    237 	return result;
    238 }
    239 
    240 function f32
    241 v3_dot(v3 a, v3 b)
    242 {
    243 	f32 result = a.x * b.x + a.y * b.y + a.z * b.z;
    244 	return result;
    245 }
    246 
    247 function f32
    248 v3_magnitude_squared(v3 a)
    249 {
    250 	f32 result = v3_dot(a, a);
    251 	return result;
    252 }
    253 
    254 function f32
    255 v3_magnitude(v3 a)
    256 {
    257 	f32 result = sqrt_f32(v3_dot(a, a));
    258 	return result;
    259 }
    260 
    261 function v3
    262 v3_normalize(v3 a)
    263 {
    264 	v3 result = v3_scale(a, 1.0f / v3_magnitude(a));
    265 	return result;
    266 }
    267 
    268 function uv4
    269 uv4_from_u32_array(u32 v[4])
    270 {
    271 	uv4 result;
    272 	result.E[0] = v[0];
    273 	result.E[1] = v[1];
    274 	result.E[2] = v[2];
    275 	result.E[3] = v[3];
    276 	return result;
    277 }
    278 
    279 function b32
    280 uv4_equal(uv4 a, uv4 b)
    281 {
    282 	return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w;
    283 }
    284 
    285 function v4
    286 v4_from_f32_array(f32 v[4])
    287 {
    288 	v4 result;
    289 	result.E[0] = v[0];
    290 	result.E[1] = v[1];
    291 	result.E[2] = v[2];
    292 	result.E[3] = v[3];
    293 	return result;
    294 }
    295 
    296 function v4
    297 v4_scale(v4 a, f32 scale)
    298 {
    299 	v4 result;
    300 	result.x = scale * a.x;
    301 	result.y = scale * a.y;
    302 	result.z = scale * a.z;
    303 	result.w = scale * a.w;
    304 	return result;
    305 }
    306 
    307 function v4
    308 v4_add(v4 a, v4 b)
    309 {
    310 	v4 result;
    311 	result.x = a.x + b.x;
    312 	result.y = a.y + b.y;
    313 	result.z = a.z + b.z;
    314 	result.w = a.w + b.w;
    315 	return result;
    316 }
    317 
    318 function v4
    319 v4_sub(v4 a, v4 b)
    320 {
    321 	v4 result = v4_add(a, v4_scale(b, -1));
    322 	return result;
    323 }
    324 
    325 function f32
    326 v4_dot(v4 a, v4 b)
    327 {
    328 	f32 result = a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
    329 	return result;
    330 }
    331 
    332 function v4
    333 v4_lerp(v4 a, v4 b, f32 t)
    334 {
    335 	v4 result = v4_add(a, v4_scale(v4_sub(b, a), t));
    336 	return result;
    337 }
    338 
    339 function m4
    340 m4_identity(void)
    341 {
    342 	m4 result;
    343 	result.c[0] = (v4){{1, 0, 0, 0}};
    344 	result.c[1] = (v4){{0, 1, 0, 0}};
    345 	result.c[2] = (v4){{0, 0, 1, 0}};
    346 	result.c[3] = (v4){{0, 0, 0, 1}};
    347 	return result;
    348 }
    349 
    350 function v4
    351 m4_row(m4 a, u32 row)
    352 {
    353 	v4 result;
    354 	result.E[0] = a.c[0].E[row];
    355 	result.E[1] = a.c[1].E[row];
    356 	result.E[2] = a.c[2].E[row];
    357 	result.E[3] = a.c[3].E[row];
    358 	return result;
    359 }
    360 
    361 function v4
    362 m4_column(m4 a, u32 column)
    363 {
    364 	v4 result = a.c[column];
    365 	return result;
    366 }
    367 
    368 function m4
    369 m4_mul(m4 a, m4 b)
    370 {
    371 	m4 result;
    372 	for (u32 i = 0; i < countof(result.E); i++) {
    373 		u32 base = i / 4;
    374 		u32 sub  = i % 4;
    375 		v4 v1 = m4_row(a, base);
    376 		v4 v2 = m4_column(b, sub);
    377 		result.E[i] = v4_dot(v1, v2);
    378 	}
    379 	return result;
    380 }
    381 
    382 /* NOTE(rnp): based on:
    383  * https://web.archive.org/web/20131215123403/ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf
    384  * TODO(rnp): redo with SIMD as given in the link (but need to rewrite for column-major)
    385  */
    386 function m4
    387 m4_inverse(m4 m)
    388 {
    389 	m4 result;
    390 	result.E[ 0] =  m.E[5] * m.E[10] * m.E[15] - m.E[5] * m.E[11] * m.E[14] - m.E[9] * m.E[6] * m.E[15] + m.E[9] * m.E[7] * m.E[14] + m.E[13] * m.E[6] * m.E[11] - m.E[13] * m.E[7] * m.E[10];
    391 	result.E[ 4] = -m.E[4] * m.E[10] * m.E[15] + m.E[4] * m.E[11] * m.E[14] + m.E[8] * m.E[6] * m.E[15] - m.E[8] * m.E[7] * m.E[14] - m.E[12] * m.E[6] * m.E[11] + m.E[12] * m.E[7] * m.E[10];
    392 	result.E[ 8] =  m.E[4] * m.E[ 9] * m.E[15] - m.E[4] * m.E[11] * m.E[13] - m.E[8] * m.E[5] * m.E[15] + m.E[8] * m.E[7] * m.E[13] + m.E[12] * m.E[5] * m.E[11] - m.E[12] * m.E[7] * m.E[ 9];
    393 	result.E[12] = -m.E[4] * m.E[ 9] * m.E[14] + m.E[4] * m.E[10] * m.E[13] + m.E[8] * m.E[5] * m.E[14] - m.E[8] * m.E[6] * m.E[13] - m.E[12] * m.E[5] * m.E[10] + m.E[12] * m.E[6] * m.E[ 9];
    394 	result.E[ 1] = -m.E[1] * m.E[10] * m.E[15] + m.E[1] * m.E[11] * m.E[14] + m.E[9] * m.E[2] * m.E[15] - m.E[9] * m.E[3] * m.E[14] - m.E[13] * m.E[2] * m.E[11] + m.E[13] * m.E[3] * m.E[10];
    395 	result.E[ 5] =  m.E[0] * m.E[10] * m.E[15] - m.E[0] * m.E[11] * m.E[14] - m.E[8] * m.E[2] * m.E[15] + m.E[8] * m.E[3] * m.E[14] + m.E[12] * m.E[2] * m.E[11] - m.E[12] * m.E[3] * m.E[10];
    396 	result.E[ 9] = -m.E[0] * m.E[ 9] * m.E[15] + m.E[0] * m.E[11] * m.E[13] + m.E[8] * m.E[1] * m.E[15] - m.E[8] * m.E[3] * m.E[13] - m.E[12] * m.E[1] * m.E[11] + m.E[12] * m.E[3] * m.E[ 9];
    397 	result.E[13] =  m.E[0] * m.E[ 9] * m.E[14] - m.E[0] * m.E[10] * m.E[13] - m.E[8] * m.E[1] * m.E[14] + m.E[8] * m.E[2] * m.E[13] + m.E[12] * m.E[1] * m.E[10] - m.E[12] * m.E[2] * m.E[ 9];
    398 	result.E[ 2] =  m.E[1] * m.E[ 6] * m.E[15] - m.E[1] * m.E[ 7] * m.E[14] - m.E[5] * m.E[2] * m.E[15] + m.E[5] * m.E[3] * m.E[14] + m.E[13] * m.E[2] * m.E[ 7] - m.E[13] * m.E[3] * m.E[ 6];
    399 	result.E[ 6] = -m.E[0] * m.E[ 6] * m.E[15] + m.E[0] * m.E[ 7] * m.E[14] + m.E[4] * m.E[2] * m.E[15] - m.E[4] * m.E[3] * m.E[14] - m.E[12] * m.E[2] * m.E[ 7] + m.E[12] * m.E[3] * m.E[ 6];
    400 	result.E[10] =  m.E[0] * m.E[ 5] * m.E[15] - m.E[0] * m.E[ 7] * m.E[13] - m.E[4] * m.E[1] * m.E[15] + m.E[4] * m.E[3] * m.E[13] + m.E[12] * m.E[1] * m.E[ 7] - m.E[12] * m.E[3] * m.E[ 5];
    401 	result.E[14] = -m.E[0] * m.E[ 5] * m.E[14] + m.E[0] * m.E[ 6] * m.E[13] + m.E[4] * m.E[1] * m.E[14] - m.E[4] * m.E[2] * m.E[13] - m.E[12] * m.E[1] * m.E[ 6] + m.E[12] * m.E[2] * m.E[ 5];
    402 	result.E[ 3] = -m.E[1] * m.E[ 6] * m.E[11] + m.E[1] * m.E[ 7] * m.E[10] + m.E[5] * m.E[2] * m.E[11] - m.E[5] * m.E[3] * m.E[10] - m.E[ 9] * m.E[2] * m.E[ 7] + m.E[ 9] * m.E[3] * m.E[ 6];
    403 	result.E[ 7] =  m.E[0] * m.E[ 6] * m.E[11] - m.E[0] * m.E[ 7] * m.E[10] - m.E[4] * m.E[2] * m.E[11] + m.E[4] * m.E[3] * m.E[10] + m.E[ 8] * m.E[2] * m.E[ 7] - m.E[ 8] * m.E[3] * m.E[ 6];
    404 	result.E[11] = -m.E[0] * m.E[ 5] * m.E[11] + m.E[0] * m.E[ 7] * m.E[ 9] + m.E[4] * m.E[1] * m.E[11] - m.E[4] * m.E[3] * m.E[ 9] - m.E[ 8] * m.E[1] * m.E[ 7] + m.E[ 8] * m.E[3] * m.E[ 5];
    405 	result.E[15] =  m.E[0] * m.E[ 5] * m.E[10] - m.E[0] * m.E[ 6] * m.E[ 9] - m.E[4] * m.E[1] * m.E[10] + m.E[4] * m.E[2] * m.E[ 9] + m.E[ 8] * m.E[1] * m.E[ 6] - m.E[ 8] * m.E[2] * m.E[ 5];
    406 
    407 	f32 determinant = m.E[0] * result.E[0] + m.E[1] * result.E[4] + m.E[2] * result.E[8] + m.E[3] * result.E[12];
    408 	determinant = 1.0f / determinant;
    409 	for(i32 i = 0; i < 16; i++)
    410 		result.E[i] *= determinant;
    411 	return result;
    412 }
    413 
    414 function m4
    415 m4_translation(v3 delta)
    416 {
    417 	m4 result;
    418 	result.c[0] = (v4){{1, 0, 0, delta.x}};
    419 	result.c[1] = (v4){{0, 1, 0, delta.y}};
    420 	result.c[2] = (v4){{0, 0, 1, delta.z}};
    421 	result.c[3] = (v4){{0, 0, 0, 1}};
    422 	return result;
    423 }
    424 
    425 function m4
    426 m4_scale(v3 scale)
    427 {
    428 	m4 result;
    429 	result.c[0] = (v4){{scale.x, 0,       0,       0}};
    430 	result.c[1] = (v4){{0,       scale.y, 0,       0}};
    431 	result.c[2] = (v4){{0,       0,       scale.z, 0}};
    432 	result.c[3] = (v4){{0,       0,       0,       1}};
    433 	return result;
    434 }
    435 
    436 function m4
    437 m4_rotation_about_y(f32 turns)
    438 {
    439 	f32 sa = sin_f32(turns * 2 * PI);
    440 	f32 ca = cos_f32(turns * 2 * PI);
    441 	m4 result;
    442 	result.c[0] = (v4){{ca, 0, -sa, 0}};
    443 	result.c[1] = (v4){{0,  1,  0,  0}};
    444 	result.c[2] = (v4){{sa, 0,  ca, 0}};
    445 	result.c[3] = (v4){{0,  0,  0,  1}};
    446 	return result;
    447 }
    448 
    449 function m4
    450 y_aligned_volume_transform(v3 extent, v3 translation, f32 rotation_turns)
    451 {
    452 	m4 S = m4_scale(extent);
    453 	m4 R = m4_rotation_about_y(rotation_turns);
    454 	m4 T = m4_translation(translation);
    455 	m4 result = m4_mul(m4_mul(R, S), T);
    456 	return result;
    457 }
    458 
    459 function v4
    460 m4_mul_v4(m4 a, v4 v)
    461 {
    462 	v4 result;
    463 	result.x = v4_dot(m4_row(a, 0), v);
    464 	result.y = v4_dot(m4_row(a, 1), v);
    465 	result.z = v4_dot(m4_row(a, 2), v);
    466 	result.w = v4_dot(m4_row(a, 3), v);
    467 	return result;
    468 }
    469 
    470 function m4
    471 orthographic_projection(f32 n, f32 f, f32 t, f32 r)
    472 {
    473 	m4 result;
    474 	f32 a = -2 / (f - n);
    475 	f32 b = - (f + n) / (f - n);
    476 	result.c[0] = (v4){{1 / r, 0,     0,  0}};
    477 	result.c[1] = (v4){{0,     1 / t, 0,  0}};
    478 	result.c[2] = (v4){{0,     0,     a,  0}};
    479 	result.c[3] = (v4){{0,     0,     b,  1}};
    480 	return result;
    481 }
    482 
    483 function m4
    484 perspective_projection(f32 n, f32 f, f32 fov, f32 aspect)
    485 {
    486 	m4 result;
    487 	f32 t = tan_f32(fov / 2.0f);
    488 	f32 r = t * aspect;
    489 	f32 a = -(f + n) / (f - n);
    490 	f32 b = -2 * f * n / (f - n);
    491 	result.c[0] = (v4){{1 / r, 0,     0,  0}};
    492 	result.c[1] = (v4){{0,     1 / t, 0,  0}};
    493 	result.c[2] = (v4){{0,     0,     a, -1}};
    494 	result.c[3] = (v4){{0,     0,     b,  0}};
    495 	return result;
    496 }
    497 
    498 function m4
    499 camera_look_at(v3 camera, v3 point)
    500 {
    501 	v3 orthogonal = {{0, 1.0f, 0}};
    502 	v3 normal     = v3_normalize(v3_sub(camera, point));
    503 	v3 right      = cross(orthogonal, normal);
    504 	v3 up         = cross(normal,     right);
    505 
    506 	v3 translate;
    507 	camera      = v3_sub((v3){0}, camera);
    508 	translate.x = v3_dot(camera, right);
    509 	translate.y = v3_dot(camera, up);
    510 	translate.z = v3_dot(camera, normal);
    511 
    512 	m4 result;
    513 	result.c[0] = (v4){{right.x,     up.x,        normal.x,    0}};
    514 	result.c[1] = (v4){{right.y,     up.y,        normal.y,    0}};
    515 	result.c[2] = (v4){{right.z,     up.z,        normal.z,    0}};
    516 	result.c[3] = (v4){{translate.x, translate.y, translate.z, 1}};
    517 	return result;
    518 }
    519 
    520 /* NOTE(rnp): adapted from "Essential Mathematics for Games and Interactive Applications" (Verth, Bishop) */
    521 function f32
    522 obb_raycast(m4 obb_orientation, v3 obb_size, v3 obb_center, ray ray)
    523 {
    524 	v3 p = v3_sub(obb_center, ray.origin);
    525 	v3 X = obb_orientation.c[0].xyz;
    526 	v3 Y = obb_orientation.c[1].xyz;
    527 	v3 Z = obb_orientation.c[2].xyz;
    528 
    529 	/* NOTE(rnp): projects direction vector onto OBB axis */
    530 	v3 f;
    531 	f.x = v3_dot(X, ray.direction);
    532 	f.y = v3_dot(Y, ray.direction);
    533 	f.z = v3_dot(Z, ray.direction);
    534 
    535 	/* NOTE(rnp): projects relative vector onto OBB axis */
    536 	v3 e;
    537 	e.x = v3_dot(X, p);
    538 	e.y = v3_dot(Y, p);
    539 	e.z = v3_dot(Z, p);
    540 
    541 	f32 result = 0;
    542 	f32 t[6] = {0};
    543 	for (i32 i = 0; i < 3; i++) {
    544 		if (f32_cmp(f.E[i], 0)) {
    545 			if (-e.E[i] - obb_size.E[i] > 0 || -e.E[i] + obb_size.E[i] < 0)
    546 				result = -1.0f;
    547 			f.E[i] = F32_EPSILON;
    548 		}
    549 		t[i * 2 + 0] = (e.E[i] + obb_size.E[i]) / f.E[i];
    550 		t[i * 2 + 1] = (e.E[i] - obb_size.E[i]) / f.E[i];
    551 	}
    552 
    553 	if (result != -1) {
    554 		f32 tmin = MAX(MAX(MIN(t[0], t[1]), MIN(t[2], t[3])), MIN(t[4], t[5]));
    555 		f32 tmax = MIN(MIN(MAX(t[0], t[1]), MAX(t[2], t[3])), MAX(t[4], t[5]));
    556 		if (tmax >= 0 && tmin <= tmax) {
    557 			result = tmin > 0 ? tmin : tmax;
    558 		} else {
    559 			result = -1;
    560 		}
    561 	}
    562 
    563 	return result;
    564 }