ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

beamformer.c (21396B)


      1 /* See LICENSE for license details. */
      2 
      3 #include "beamformer_internal.h"
      4 
      5 /* NOTE(rnp): magic variables to force discrete GPU usage on laptops with multiple devices */
      6 EXPORT i32 NvOptimusEnablement = 1;
      7 EXPORT i32 AmdPowerXpressRequestHighPerformance = 1;
      8 
      9 #if !BEAMFORMER_DEBUG
     10 #include "beamformer_core.c"
     11 #else
     12 
     13 typedef void beamformer_frame_step_fn(BeamformerInput *);
     14 
     15 #define BEAMFORMER_DEBUG_ENTRY_POINTS \
     16 	X(beamformer_debug_ui_deinit)  \
     17 	X(beamformer_complete_compute) \
     18 	X(beamformer_frame_step)       \
     19 	X(beamformer_rf_upload)        \
     20 
     21 #define X(name) global name ##_fn *name;
     22 BEAMFORMER_DEBUG_ENTRY_POINTS
     23 #undef X
     24 
     25 BEAMFORMER_EXPORT void
     26 beamformer_debug_hot_reload(OSLibrary library, BeamformerInput *input)
     27 {
     28 	BeamformerCtx *ctx = BeamformerContextMemory(input->memory);
     29 
     30 	// TODO(rnp): this will deadlock if live imaging is active
     31 	/* NOTE(rnp): spin until compute thread finishes its work (we will probably
     32 	 * never reload while compute is in progress but just incase). */
     33 	spin_wait(atomic_load_u32(&ctx->upload_worker.awake));
     34 	spin_wait(atomic_load_u32(&ctx->compute_worker.awake));
     35 
     36 	#define X(name) name = os_lookup_symbol(library, #name);
     37 	BEAMFORMER_DEBUG_ENTRY_POINTS
     38 	#undef X
     39 
     40 	s8 info = beamformer_info("reloaded main executable");
     41 	os_console_log(info.data, info.len);
     42 }
     43 
     44 #endif /* BEAMFORMER_DEBUG */
     45 
     46 function no_return void
     47 fatal(s8 message)
     48 {
     49 	os_fatal(message.data, message.len);
     50 	unreachable();
     51 }
     52 
     53 // TODO(rnp): none of this belongs here, but will be removed
     54 // once vulkan migration is complete
     55 #define GLFW_VISIBLE 0x00020004
     56 void   glfwWindowHint(i32, i32);
     57 iptr   glfwCreateWindow(i32, i32, char *, iptr, iptr);
     58 void   glfwMakeContextCurrent(iptr);
     59 iptr   glfwGetGLXContext(iptr);
     60 iptr   glfwGetWGLContext(iptr);
     61 void * glfwGetProcAddress(char *);
     62 
     63 #if OS_WINDOWS
     64 function iptr
     65 os_get_native_gl_context(iptr window)
     66 {
     67 	return glfwGetWGLContext(window);
     68 }
     69 #else
     70 function iptr
     71 os_get_native_gl_context(iptr window)
     72 {
     73 	return glfwGetGLXContext(window);
     74 }
     75 #endif
     76 
     77 function void
     78 gl_debug_logger(u32 src, u32 type, u32 id, u32 lvl, i32 len, const char *msg, const void *userctx)
     79 {
     80 	Stream *e = (Stream *)userctx;
     81 	stream_append_s8s(e, s8("[OpenGL] "), (s8){.len = len, .data = (u8 *)msg}, s8("\n"));
     82 	os_console_log(e->data, e->widx);
     83 	stream_reset(e, 0);
     84 }
     85 
     86 function void
     87 load_gl(Stream *err)
     88 {
     89 	#define X(name, ret, params) name = (name##_fn *)glfwGetProcAddress(#name);
     90 	OGLProcedureList
     91 	#undef X
     92 
     93 	/* NOTE: Gather information about the GPU */
     94 	{
     95 		char *vendor = (char *)glGetString(GL_VENDOR);
     96 		if (!vendor) {
     97 			stream_append_s8(err, s8("Failed to determine GL Vendor\n"));
     98 			fatal(stream_to_s8(err));
     99 		}
    100 		/* TODO(rnp): str prefix of */
    101 		switch (vendor[0]) {
    102 		case 'A': gl_parameters.vendor_id = GLVendor_AMD;    break;
    103 		case 'I': gl_parameters.vendor_id = GLVendor_Intel;  break;
    104 		case 'N': gl_parameters.vendor_id = GLVendor_NVIDIA; break;
    105 		/* NOTE(rnp): freedreno */
    106 		case 'f': gl_parameters.vendor_id = GLVendor_ARM;    break;
    107 		/* NOTE(rnp): Microsoft Corporation - weird win32 thing (microsoft is just using mesa for the driver) */
    108 		case 'M': gl_parameters.vendor_id = GLVendor_ARM;    break;
    109 		default:
    110 			stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n"));
    111 			fatal(stream_to_s8(err));
    112 		}
    113 
    114 		#define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl_parameters.name);
    115 		GL_PARAMETERS
    116 		#undef X
    117 	}
    118 
    119 #ifdef _DEBUG
    120 	{
    121 		s8 vendor = s8("vendor:");
    122 		i32 max_width = (i32)vendor.len;
    123 		#define X(glname, name, suffix) if (s8(#name).len > max_width) max_width = (i32)s8(#name ":").len;
    124 		GL_PARAMETERS
    125 		#undef X
    126 		max_width++;
    127 
    128 		stream_append_s8s(err, s8("---- GL Parameters ----\n"), vendor);
    129 		stream_pad(err, ' ', max_width - (i32)vendor.len);
    130 		switch (gl_parameters.vendor_id) {
    131 		case GLVendor_AMD:    stream_append_s8(err, s8("AMD"));    break;
    132 		case GLVendor_ARM:    stream_append_s8(err, s8("ARM"));    break;
    133 		case GLVendor_Intel:  stream_append_s8(err, s8("Intel"));  break;
    134 		case GLVendor_NVIDIA: stream_append_s8(err, s8("nVidia")); break;
    135 		}
    136 		stream_append_byte(err, '\n');
    137 
    138 		#define X(glname, name, suffix) \
    139 			stream_append_s8(err, s8(#name ":"));                     \
    140 			stream_pad(err, ' ', max_width - (i32)s8(#name ":").len); \
    141 			stream_append_i64(err, gl_parameters.name);               \
    142 			stream_append_s8(err, s8(suffix "\n"));
    143 		GL_PARAMETERS
    144 		#undef X
    145 		stream_append_s8(err, s8("-----------------------\n"));
    146 		os_console_log(err->data, err->widx);
    147 	}
    148 #endif
    149 
    150 	{
    151 		stream_reset(err, 0);
    152 		if (gl_parameters.max_ubo_size < (i32)sizeof(BeamformerParameters)) {
    153 			stream_append_s8(err, s8("GPU must support UBOs of at least "));
    154 			stream_append_i64(err, sizeof(BeamformerParameters));
    155 			stream_append_s8(err, s8(" bytes!\n"));
    156 		}
    157 
    158 		#define X(name, ret, params) if (!name) stream_append_s8(err, s8("missing required GL function: " #name "\n"));
    159 		OGLProcedureList
    160 		#undef X
    161 
    162 		if (err->widx) fatal(stream_to_s8(err));
    163 	}
    164 }
    165 
    166 function void
    167 beamformer_load_cuda_library(BeamformerCtx *ctx, OSLibrary cuda, Arena arena)
    168 {
    169 	/* TODO(rnp): (25.10.30) registering the rf buffer with CUDA is currently
    170 	 * causing a major performance regression. for now we are disabling its use
    171 	 * altogether. it will be reenabled once the issue can be fixed */
    172 	b32 result = 0 && gl_parameters.vendor_id == GLVendor_NVIDIA && ValidHandle(cuda);
    173 	if (result) {
    174 		Stream err = arena_stream(arena);
    175 
    176 		stream_append_s8(&err, beamformer_info("loading CUDA library functions"));
    177 		#define X(name, symname) cuda_## name = os_lookup_symbol(cuda, symname);
    178 		CUDALibraryProcedureList
    179 		#undef X
    180 
    181 		os_console_log(err.data, err.widx);
    182 	}
    183 
    184 	#define X(name, symname) if (!cuda_## name) cuda_## name = cuda_ ## name ## _stub;
    185 	CUDALibraryProcedureList
    186 	#undef X
    187 }
    188 
    189 function BeamformerRenderModel
    190 render_model_from_arrays(f32 *vertices, f32 *normals, i32 vertices_size, u16 *indices, i32 index_count)
    191 {
    192 	BeamformerRenderModel result = {0};
    193 
    194 	i32 buffer_size    = vertices_size * 2 + index_count * (i32)sizeof(u16);
    195 	i32 indices_offset = vertices_size * 2;
    196 	i32 indices_size   = index_count * (i32)sizeof(u16);
    197 
    198 	result.elements        = index_count;
    199 	result.elements_offset = indices_offset;
    200 
    201 	glCreateBuffers(1, &result.buffer);
    202 	glNamedBufferStorage(result.buffer, buffer_size, 0, GL_DYNAMIC_STORAGE_BIT);
    203 	glNamedBufferSubData(result.buffer, 0,              vertices_size, vertices);
    204 	glNamedBufferSubData(result.buffer, vertices_size,  vertices_size, normals);
    205 	glNamedBufferSubData(result.buffer, indices_offset, indices_size,  indices);
    206 
    207 	glCreateVertexArrays(1, &result.vao);
    208 	glVertexArrayVertexBuffer(result.vao, 0, result.buffer, 0,             3 * sizeof(f32));
    209 	glVertexArrayVertexBuffer(result.vao, 1, result.buffer, vertices_size, 3 * sizeof(f32));
    210 	glVertexArrayElementBuffer(result.vao, result.buffer);
    211 
    212 	glEnableVertexArrayAttrib(result.vao, 0);
    213 	glEnableVertexArrayAttrib(result.vao, 1);
    214 
    215 	glVertexArrayAttribFormat(result.vao, 0, 3, GL_FLOAT, 0, 0);
    216 	glVertexArrayAttribFormat(result.vao, 1, 3, GL_FLOAT, 0, (u32)vertices_size);
    217 
    218 	glVertexArrayAttribBinding(result.vao, 0, 0);
    219 	glVertexArrayAttribBinding(result.vao, 1, 0);
    220 
    221 	return result;
    222 }
    223 
    224 function void
    225 worker_thread_sleep(GLWorkerThreadContext *ctx, BeamformerSharedMemory *sm)
    226 {
    227 	for (;;) {
    228 		i32 expected = 0;
    229 		if (atomic_cas_u32(&ctx->sync_variable, &expected, 1) ||
    230 		    atomic_load_u32(&sm->live_imaging_parameters.active))
    231 		{
    232 			break;
    233 		}
    234 
    235 		/* TODO(rnp): clean this crap up; we shouldn't need two values to communicate this */
    236 		atomic_store_u32(&ctx->awake, 0);
    237 		os_wait_on_address(&ctx->sync_variable, 1, (u32)-1);
    238 		atomic_store_u32(&ctx->awake, 1);
    239 	}
    240 }
    241 
    242 function OS_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point)
    243 {
    244 	GLWorkerThreadContext *ctx = user_context;
    245 
    246 	glfwMakeContextCurrent(ctx->window_handle);
    247 	ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
    248 
    249 	BeamformerCtx *beamformer = (BeamformerCtx *)ctx->user_context;
    250 	glCreateQueries(GL_TIME_ELAPSED, countof(beamformer->compute_context.shader_timer_ids),
    251 	                beamformer->compute_context.shader_timer_ids);
    252 
    253 	for (;;) {
    254 		worker_thread_sleep(ctx, beamformer->shared_memory);
    255 		asan_poison_region(ctx->arena.beg, ctx->arena.end - ctx->arena.beg);
    256 		beamformer_complete_compute(ctx->user_context, &ctx->arena, ctx->gl_context);
    257 	}
    258 
    259 	unreachable();
    260 
    261 	return 0;
    262 }
    263 
    264 function OS_THREAD_ENTRY_POINT_FN(beamformer_upload_entry_point)
    265 {
    266 	GLWorkerThreadContext *ctx = user_context;
    267 	glfwMakeContextCurrent(ctx->window_handle);
    268 	ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
    269 
    270 	BeamformerUploadThreadContext *up = (typeof(up))ctx->user_context;
    271 	glCreateQueries(GL_TIMESTAMP, 1, &up->rf_buffer->data_timestamp_query);
    272 	/* NOTE(rnp): start this here so we don't have to worry about it being started or not */
    273 	glQueryCounter(up->rf_buffer->data_timestamp_query, GL_TIMESTAMP);
    274 
    275 	for (;;) {
    276 		worker_thread_sleep(ctx, up->shared_memory);
    277 		beamformer_rf_upload(up);
    278 	}
    279 
    280 	unreachable();
    281 
    282 	return 0;
    283 }
    284 
    285 BEAMFORMER_EXPORT void
    286 beamformer_init(BeamformerInput *input)
    287 {
    288 	Arena  memory        = arena_from_memory(input->memory, input->memory_size);
    289 	Arena  compute_arena = sub_arena_end(&memory, MB(2), KB(4));
    290 	Arena  upload_arena  = sub_arena_end(&memory, KB(4), KB(4));
    291 	Arena  ui_arena      = sub_arena_end(&memory, MB(2), KB(4));
    292 	Stream error         = arena_stream(sub_arena_end(&memory, MB(1), 1));
    293 	BeamformerCtx *ctx   = push_struct(&memory, BeamformerCtx);
    294 
    295 	Arena scratch = {.beg = memory.end - 4096L, .end = memory.end};
    296 	memory.end = scratch.beg;
    297 
    298 	ctx->window_size = (iv2){{1280, 840}};
    299 	ctx->error_stream = error;
    300 	ctx->ui_backing_store = ui_arena;
    301 
    302 	ctx->compute_worker.arena  = compute_arena;
    303 	ctx->upload_worker.arena   = upload_arena;
    304 
    305 	beamformer_load_cuda_library(ctx, input->cuda_library_handle, memory);
    306 
    307 	SetConfigFlags(FLAG_VSYNC_HINT|FLAG_WINDOW_ALWAYS_RUN);
    308 	InitWindow(ctx->window_size.w, ctx->window_size.h, "OGL Beamformer");
    309 	/* NOTE: do this after initing so that the window starts out floating in tiling wm */
    310 	SetWindowState(FLAG_WINDOW_RESIZABLE);
    311 	SetWindowMinSize(840, ctx->window_size.h);
    312 
    313 	glfwWindowHint(GLFW_VISIBLE, 0);
    314 	iptr raylib_window_handle = (iptr)GetPlatformWindowHandle();
    315 
    316 	load_gl(&ctx->error_stream);
    317 
    318 	ctx->beamform_work_queue  = push_struct(&memory, BeamformWorkQueue);
    319 	ctx->compute_shader_stats = push_struct(&memory, ComputeShaderStats);
    320 	ctx->compute_timing_table = push_struct(&memory, ComputeTimingTable);
    321 
    322 	ctx->shared_memory = input->shared_memory;
    323 	if (!ctx->shared_memory) fatal(s8("Get more ram lol\n"));
    324 	zero_struct(ctx->shared_memory);
    325 
    326 	ctx->shared_memory->version = BEAMFORMER_SHARED_MEMORY_VERSION;
    327 	ctx->shared_memory->reserved_parameter_blocks = 1;
    328 
    329 	/* TODO(rnp): I'm not sure if its a good idea to pre-reserve a bunch of semaphores
    330 	 * on w32 but thats what we are doing for now */
    331 	#if OS_WINDOWS
    332 	{
    333 		Stream sb = arena_stream(memory);
    334 		stream_append(&sb, input->shared_memory_name, input->shared_memory_name_length);
    335 		stream_append_s8(&sb, s8("_lock_"));
    336 		i32 start_index = sb.widx;
    337 		for EachElement(os_w32_shared_memory_semaphores, it) {
    338 			stream_reset(&sb, start_index);
    339 			stream_append_u64(&sb, it);
    340 			stream_append_byte(&sb, 0);
    341 			os_w32_shared_memory_semaphores[it] = os_w32_create_semaphore((c8 *)sb.data, 1, 1);
    342 			if InvalidHandle(os_w32_shared_memory_semaphores[it])
    343 				fatal(beamformer_info("init: failed to create w32 shared memory semaphore\n"));
    344 
    345 			/* NOTE(rnp): hacky garbage because CreateSemaphore will just open an existing
    346 			 * semaphore without any indication. Sometimes the other side of the shared memory
    347 			 * will provide incorrect parameters or will otherwise fail and its faster to
    348 			 * restart this program than to get that application to release the semaphores */
    349 			/* TODO(rnp): figure out something more robust */
    350 			os_w32_semaphore_release(os_w32_shared_memory_semaphores[it], 1);
    351 		}
    352 	}
    353 	#endif
    354 
    355 	BeamformerComputeContext *cs = &ctx->compute_context;
    356 
    357 	GLWorkerThreadContext *worker = &ctx->compute_worker;
    358 	/* TODO(rnp): we should lock this down after we have something working */
    359 	worker->user_context  = (iptr)ctx;
    360 	worker->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle);
    361 	worker->handle        = os_create_thread("[compute]", worker, compute_worker_thread_entry_point);
    362 
    363 	GLWorkerThreadContext         *upload = &ctx->upload_worker;
    364 	BeamformerUploadThreadContext *upctx  = push_struct(&memory, typeof(*upctx));
    365 	upload->user_context = (iptr)upctx;
    366 	upctx->rf_buffer     = &cs->rf_buffer;
    367 	upctx->shared_memory = ctx->shared_memory;
    368 	upctx->compute_timing_table = ctx->compute_timing_table;
    369 	upctx->compute_worker_sync  = &ctx->compute_worker.sync_variable;
    370 	upload->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle);
    371 	upload->handle        = os_create_thread("[upload]", upload, beamformer_upload_entry_point);
    372 
    373 	glfwMakeContextCurrent(raylib_window_handle);
    374 
    375 	/* NOTE: set up OpenGL debug logging */
    376 	Stream *gl_error_stream = push_struct(&memory, Stream);
    377 	*gl_error_stream        = stream_alloc(&memory, 1024);
    378 	glDebugMessageCallback(gl_debug_logger, gl_error_stream);
    379 #ifdef _DEBUG
    380 	glEnable(GL_DEBUG_OUTPUT);
    381 #endif
    382 
    383 	if (!BakeShaders)
    384 	{
    385 		for EachElement(beamformer_reloadable_compute_shader_info_indices, it) {
    386 			i32   index = beamformer_reloadable_compute_shader_info_indices[it];
    387 			Arena temp  = scratch;
    388 			s8 file = push_s8_from_parts(&temp, os_path_separator(), s8("shaders"),
    389 			                             beamformer_reloadable_shader_files[index]);
    390 			BeamformerFileReloadContext *frc = push_struct(&memory, typeof(*frc));
    391 			frc->kind                = BeamformerFileReloadKind_ComputeShader;
    392 			frc->compute_shader_kind = beamformer_reloadable_shader_kinds[index];
    393 			os_add_file_watch((char *)file.data, file.len, frc);
    394 		}
    395 	}
    396 
    397 	FrameViewRenderContext *fvr = &ctx->frame_view_render_context;
    398 	glCreateFramebuffers(countof(fvr->framebuffers), fvr->framebuffers);
    399 	LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[0], s8("Frame View Framebuffer"));
    400 	LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[1], s8("Frame View Resolving Framebuffer"));
    401 
    402 	glCreateRenderbuffers(countof(fvr->renderbuffers), fvr->renderbuffers);
    403 	i32 msaa_samples = gl_parameters.vendor_id == GLVendor_ARM? 4 : 8;
    404 	glNamedRenderbufferStorageMultisample(fvr->renderbuffers[0], msaa_samples, GL_RGBA8,
    405 	                                      FRAME_VIEW_RENDER_TARGET_SIZE);
    406 	glNamedRenderbufferStorageMultisample(fvr->renderbuffers[1], msaa_samples, GL_DEPTH_COMPONENT24,
    407 	                                      FRAME_VIEW_RENDER_TARGET_SIZE);
    408 
    409 	static_assert(countof(beamformer_reloadable_render_shader_info_indices) == 1,
    410 	              "only a single render shader is currently handled");
    411 	i32 render_rsi_index = beamformer_reloadable_render_shader_info_indices[0];
    412 
    413 	// TODO(rnp): leaks when BakeShaders is true
    414 	Arena *arena = &memory;
    415 	BeamformerShaderReloadContext *render_3d = push_struct(arena, typeof(*render_3d));
    416 	render_3d->reloadable_info_index = render_rsi_index;
    417 	render_3d->gl_type = GL_FRAGMENT_SHADER;
    418 	render_3d->header  = s8(""
    419 	"layout(location = 0) in  vec3 normal;\n"
    420 	"layout(location = 1) in  vec3 texture_coordinate;\n\n"
    421 	"layout(location = 2) in  vec3 test_texture_coordinate;\n\n"
    422 	"layout(location = 0) out vec4 out_colour;\n\n"
    423 	"layout(location = " str(FRAME_VIEW_DYNAMIC_RANGE_LOC) ") uniform float u_db_cutoff = 60;\n"
    424 	"layout(location = " str(FRAME_VIEW_THRESHOLD_LOC)     ") uniform float u_threshold = 40;\n"
    425 	"layout(location = " str(FRAME_VIEW_GAMMA_LOC)         ") uniform float u_gamma     = 1;\n"
    426 	"layout(location = " str(FRAME_VIEW_LOG_SCALE_LOC)     ") uniform bool  u_log_scale;\n"
    427 	"layout(location = " str(FRAME_VIEW_BB_COLOUR_LOC)     ") uniform vec4  u_bb_colour   = vec4(" str(FRAME_VIEW_BB_COLOUR) ");\n"
    428 	"layout(location = " str(FRAME_VIEW_BB_FRACTION_LOC)   ") uniform float u_bb_fraction = " str(FRAME_VIEW_BB_FRACTION) ";\n"
    429 	"layout(location = " str(FRAME_VIEW_SOLID_BB_LOC)      ") uniform bool  u_solid_bb;\n"
    430 	"\n"
    431 	"layout(binding = 0) uniform sampler3D u_texture;\n");
    432 
    433 	render_3d->link = push_struct(arena, typeof(*render_3d));
    434 	render_3d->link->reloadable_info_index = -1;
    435 	render_3d->link->gl_type = GL_VERTEX_SHADER;
    436 	render_3d->link->link    = render_3d;
    437 	render_3d->link->header  = s8(""
    438 	"layout(location = 0) in vec3 v_position;\n"
    439 	"layout(location = 1) in vec3 v_normal;\n"
    440 	"\n"
    441 	"layout(location = 0) out vec3 f_normal;\n"
    442 	"layout(location = 1) out vec3 f_texture_coordinate;\n"
    443 	"layout(location = 2) out vec3 f_orig_texture_coordinate;\n"
    444 	"\n"
    445 	"layout(location = " str(FRAME_VIEW_MODEL_MATRIX_LOC)  ") uniform mat4  u_model;\n"
    446 	"layout(location = " str(FRAME_VIEW_VIEW_MATRIX_LOC)   ") uniform mat4  u_view;\n"
    447 	"layout(location = " str(FRAME_VIEW_PROJ_MATRIX_LOC)   ") uniform mat4  u_projection;\n"
    448 	"\n"
    449 	"\n"
    450 	"void main()\n"
    451 	"{\n"
    452 	"\tvec3 pos = v_position;\n"
    453 	"\tf_orig_texture_coordinate = (2 * v_position + 1) / 2;\n"
    454 	//"\tif (v_position.y == -1) pos.x = clamp(v_position.x, -u_clip_fraction, u_clip_fraction);\n"
    455 	"\tvec3 tex_coord = (2 * pos + 1) / 2;\n"
    456 	"\tf_texture_coordinate = tex_coord.xzy;\n"
    457 	//"\tf_texture_coordinate = u_swizzle? tex_coord.xzy : tex_coord;\n"
    458 	//"\tf_normal    = normalize(mat3(u_model) * v_normal);\n"
    459 	"\tf_normal    = v_normal;\n"
    460 	"\tgl_Position = u_projection * u_view * u_model * vec4(pos, 1);\n"
    461 	"}\n");
    462 
    463 	// TODO(rnp): this is probably not expected by the platform, refactor so that all
    464 	// needed context (eg. headers) are available outside of here and push initial load
    465 	// into ui_init
    466 	{
    467 		BeamformerFileReloadContext *frc = push_struct(&memory, typeof(*frc));
    468 		frc->kind                  = BeamformerFileReloadKind_Shader;
    469 		frc->shader_reload_context = render_3d;
    470 		input->event_queue[input->event_count++] = (BeamformerInputEvent){
    471 			.kind = BeamformerInputEventKind_FileEvent,
    472 			.file_watch_user_context = frc,
    473 		};
    474 
    475 		s8 render_file = {0};
    476 		if (!BakeShaders) {
    477 			render_file = push_s8_from_parts(&scratch, os_path_separator(), s8("shaders"),
    478 			                                 beamformer_reloadable_shader_files[render_rsi_index]);
    479 			os_add_file_watch((char *)render_file.data, render_file.len, frc);
    480 		}
    481 	}
    482 
    483 	f32 unit_cube_vertices[] = {
    484 		 0.5f,  0.5f, -0.5f,
    485 		 0.5f,  0.5f, -0.5f,
    486 		 0.5f,  0.5f, -0.5f,
    487 		 0.5f, -0.5f, -0.5f,
    488 		 0.5f, -0.5f, -0.5f,
    489 		 0.5f, -0.5f, -0.5f,
    490 		 0.5f,  0.5f,  0.5f,
    491 		 0.5f,  0.5f,  0.5f,
    492 		 0.5f,  0.5f,  0.5f,
    493 		 0.5f, -0.5f,  0.5f,
    494 		 0.5f, -0.5f,  0.5f,
    495 		 0.5f, -0.5f,  0.5f,
    496 		-0.5f,  0.5f, -0.5f,
    497 		-0.5f,  0.5f, -0.5f,
    498 		-0.5f,  0.5f, -0.5f,
    499 		-0.5f, -0.5f, -0.5f,
    500 		-0.5f, -0.5f, -0.5f,
    501 		-0.5f, -0.5f, -0.5f,
    502 		-0.5f,  0.5f,  0.5f,
    503 		-0.5f,  0.5f,  0.5f,
    504 		-0.5f,  0.5f,  0.5f,
    505 		-0.5f, -0.5f,  0.5f,
    506 		-0.5f, -0.5f,  0.5f,
    507 		-0.5f, -0.5f,  0.5f
    508 	};
    509 	f32 unit_cube_normals[] = {
    510 		 0.0f,  0.0f, -1.0f,
    511 		 0.0f,  1.0f,  0.0f,
    512 		 1.0f,  0.0f,  0.0f,
    513 		 0.0f,  0.0f, -1.0f,
    514 		 0.0f, -1.0f,  0.0f,
    515 		 1.0f,  0.0f,  0.0f,
    516 		 0.0f,  0.0f,  1.0f,
    517 		 0.0f,  1.0f,  0.0f,
    518 		 1.0f,  0.0f,  0.0f,
    519 		 0.0f,  0.0f,  1.0f,
    520 		 0.0f, -1.0f,  0.0f,
    521 		 1.0f,  0.0f,  0.0f,
    522 		 0.0f,  0.0f, -1.0f,
    523 		 0.0f,  1.0f,  0.0f,
    524 		-1.0f,  0.0f,  0.0f,
    525 		 0.0f,  0.0f, -1.0f,
    526 		 0.0f, -1.0f,  0.0f,
    527 		-1.0f,  0.0f,  0.0f,
    528 		 0.0f,  0.0f,  1.0f,
    529 		 0.0f,  1.0f,  0.0f,
    530 		-1.0f,  0.0f,  0.0f,
    531 		 0.0f,  0.0f,  1.0f,
    532 		 0.0f, -1.0f,  0.0f,
    533 		-1.0f,  0.0f,  0.0f
    534 	};
    535 	u16 unit_cube_indices[] = {
    536 		1,  13, 19,
    537 		1,  19, 7,
    538 		9,  6,  18,
    539 		9,  18, 21,
    540 		23, 20, 14,
    541 		23, 14, 17,
    542 		16, 4,  10,
    543 		16, 10, 22,
    544 		5,  2,  8,
    545 		5,  8,  11,
    546 		15, 12, 0,
    547 		15, 0,  3
    548 	};
    549 
    550 	cs->unit_cube_model = render_model_from_arrays(unit_cube_vertices, unit_cube_normals,
    551 	                                               sizeof(unit_cube_vertices),
    552 	                                               unit_cube_indices, countof(unit_cube_indices));
    553 
    554 	memory.end = scratch.end;
    555 	ctx->arena = memory;
    556 	ctx->state = BeamformerState_Running;
    557 }
    558 
    559 BEAMFORMER_EXPORT void
    560 beamformer_terminate(BeamformerInput *input)
    561 {
    562 	/* NOTE(rnp): work around pebkac when the beamformer is closed while we are doing live
    563 	 * imaging. if the verasonics is blocked in an external function (calling the library
    564 	 * to start compute) it is impossible for us to get it to properly shut down which
    565 	 * will sometimes result in us needing to power cycle the system. set the shared memory
    566 	 * into an error state and release dispatch lock so that future calls will error instead
    567 	 * of blocking.
    568 	 */
    569 	BeamformerCtx *          ctx = BeamformerContextMemory(input->memory);
    570 	BeamformerSharedMemory * sm  = input->shared_memory;
    571 	if (ctx->state != BeamformerState_Terminated) {
    572 		if (sm) {
    573 			BeamformerSharedMemoryLockKind lock = BeamformerSharedMemoryLockKind_DispatchCompute;
    574 			atomic_store_u32(&sm->invalid, 1);
    575 			atomic_store_u32(&sm->external_work_queue.ridx, sm->external_work_queue.widx);
    576 			DEBUG_DECL(if (sm->locks[lock])) {
    577 				beamformer_shared_memory_release_lock(sm, (i32)lock);
    578 			}
    579 
    580 			atomic_or_u32(&sm->live_imaging_dirty_flags, BeamformerLiveImagingDirtyFlags_StopImaging);
    581 		}
    582 
    583 		beamformer_debug_ui_deinit(ctx);
    584 
    585 		ctx->state = BeamformerState_Terminated;
    586 	}
    587 }
    588 
    589 BEAMFORMER_EXPORT u32
    590 beamformer_should_close(BeamformerInput *input)
    591 {
    592 	BeamformerCtx * ctx = BeamformerContextMemory(input->memory);
    593 	if (ctx->state == BeamformerState_ShouldClose)
    594 		beamformer_terminate(input);
    595 	return ctx->state == BeamformerState_Terminated;
    596 }