ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

static.c (22021B)


      1 /* See LICENSE for license details. */
      2 
      3 /* NOTE(rnp): magic variables to force discrete GPU usage on laptops with multiple devices */
      4 EXPORT i32 NvOptimusEnablement = 1;
      5 EXPORT i32 AmdPowerXpressRequestHighPerformance = 1;
      6 
      7 #ifndef _DEBUG
      8 
      9 #include "beamformer.c"
     10 #define debug_init(...)
     11 
     12 #else
     13 
     14 global void *debug_lib;
     15 
     16 #define DEBUG_ENTRY_POINTS \
     17 	X(beamformer_debug_ui_deinit)  \
     18 	X(beamformer_complete_compute) \
     19 	X(beamformer_frame_step)       \
     20 	X(beamformer_reload_shader)    \
     21 	X(beamformer_rf_upload)
     22 
     23 #define X(name) global name ##_fn *name;
     24 DEBUG_ENTRY_POINTS
     25 #undef X
     26 
     27 function FILE_WATCH_CALLBACK_FN(debug_reload)
     28 {
     29 	BeamformerInput *input = (BeamformerInput *)user_data;
     30 	Stream err             = arena_stream(arena);
     31 
     32 	/* NOTE(rnp): spin until compute thread finishes its work (we will probably
     33 	 * never reload while compute is in progress but just incase). */
     34 	spin_wait(!atomic_load_u32(&os->compute_worker.asleep));
     35 	spin_wait(!atomic_load_u32(&os->upload_worker.asleep));
     36 
     37 	os_unload_library(debug_lib);
     38 	debug_lib = os_load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME, &err);
     39 
     40 	#define X(name) name = os_lookup_dynamic_symbol(debug_lib, #name, &err);
     41 	DEBUG_ENTRY_POINTS
     42 	#undef X
     43 
     44 	stream_append_s8(&err, s8("Reloaded Main Executable\n"));
     45 	os_write_file(os->error_handle, stream_to_s8(&err));
     46 
     47 	input->executable_reloaded = 1;
     48 
     49 	return 1;
     50 }
     51 
     52 function void
     53 debug_init(OS *os, iptr input, Arena *arena)
     54 {
     55 	os_add_file_watch(os, arena, s8(OS_DEBUG_LIB_NAME), debug_reload, input);
     56 	debug_reload(os, s8(""), input, *arena);
     57 
     58 	Stream err = arena_stream(*arena);
     59 	void *rdoc = os_get_module(OS_RENDERDOC_SONAME, 0);
     60 	if (rdoc) {
     61 		renderdoc_get_api_fn *get_api = os_lookup_dynamic_symbol(rdoc, "RENDERDOC_GetAPI", &err);
     62 		if (get_api) {
     63 			RenderDocAPI *api = 0;
     64 			if (get_api(10600, (void **)&api)) {
     65 				os->start_frame_capture = RENDERDOC_START_FRAME_CAPTURE(api);
     66 				os->end_frame_capture   = RENDERDOC_END_FRAME_CAPTURE(api);
     67 				stream_append_s8(&err, s8("loaded: " OS_RENDERDOC_SONAME "\n"));
     68 			}
     69 		}
     70 	}
     71 
     72 	os_write_file(os->error_handle, stream_to_s8(&err));
     73 }
     74 
     75 #endif /* _DEBUG */
     76 
     77 struct gl_debug_ctx {
     78 	Stream stream;
     79 	iptr   os_error_handle;
     80 };
     81 
     82 function void
     83 gl_debug_logger(u32 src, u32 type, u32 id, u32 lvl, i32 len, const char *msg, const void *userctx)
     84 {
     85 	struct gl_debug_ctx *ctx = (struct gl_debug_ctx *)userctx;
     86 	Stream *e = &ctx->stream;
     87 	stream_append_s8s(e, s8("[OpenGL] "), (s8){.len = len, .data = (u8 *)msg}, s8("\n"));
     88 	os_write_file(ctx->os_error_handle, stream_to_s8(e));
     89 	stream_reset(e, 0);
     90 }
     91 
     92 function void
     93 get_gl_params(GLParams *gl, Stream *err)
     94 {
     95 	char *vendor = (char *)glGetString(GL_VENDOR);
     96 	if (!vendor) {
     97 		stream_append_s8(err, s8("Failed to determine GL Vendor\n"));
     98 		os_fatal(stream_to_s8(err));
     99 	}
    100 	/* TODO(rnp): str prefix of */
    101 	switch (vendor[0]) {
    102 	case 'A': gl->vendor_id = GL_VENDOR_AMD;    break;
    103 	case 'I': gl->vendor_id = GL_VENDOR_INTEL;  break;
    104 	case 'N': gl->vendor_id = GL_VENDOR_NVIDIA; break;
    105 	/* NOTE(rnp): freedreno */
    106 	case 'f': gl->vendor_id = GL_VENDOR_ARM;    break;
    107 	/* NOTE(rnp): Microsoft Corporation - weird win32 thing (microsoft is just using mesa for the driver) */
    108 	case 'M': gl->vendor_id = GL_VENDOR_ARM;    break;
    109 	default:
    110 		stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n"));
    111 		os_fatal(stream_to_s8(err));
    112 	}
    113 
    114 	#define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl->name);
    115 	GL_PARAMETERS
    116 	#undef X
    117 }
    118 
    119 function void
    120 validate_gl_requirements(GLParams *gl, Arena a)
    121 {
    122 	Stream s = arena_stream(a);
    123 
    124 	if (gl->max_ubo_size < (i32)sizeof(BeamformerParameters)) {
    125 		stream_append_s8(&s, s8("GPU must support UBOs of at least "));
    126 		stream_append_i64(&s, sizeof(BeamformerParameters));
    127 		stream_append_s8(&s, s8(" bytes!\n"));
    128 	}
    129 
    130 	#define X(name, ret, params) if (!name) stream_append_s8s(&s, s8("missing required GL function:"), s8(#name), s8("\n"));
    131 	OGLProcedureList
    132 	#undef X
    133 
    134 	if (s.widx) os_fatal(stream_to_s8(&s));
    135 }
    136 
    137 function void
    138 dump_gl_params(GLParams *gl, Arena a, OS *os)
    139 {
    140 #ifdef _DEBUG
    141 	s8 vendor = s8("vendor:");
    142 	i32 max_width = (i32)vendor.len;
    143 	#define X(glname, name, suffix) if (s8(#name).len > max_width) max_width = (i32)s8(#name ":").len;
    144 	GL_PARAMETERS
    145 	#undef X
    146 	max_width++;
    147 
    148 	Stream s = arena_stream(a);
    149 	stream_append_s8s(&s, s8("---- GL Parameters ----\n"), vendor);
    150 	stream_pad(&s, ' ', max_width - (i32)vendor.len);
    151 	switch (gl->vendor_id) {
    152 	case GL_VENDOR_AMD:    stream_append_s8(&s, s8("AMD\n"));    break;
    153 	case GL_VENDOR_ARM:    stream_append_s8(&s, s8("ARM\n"));    break;
    154 	case GL_VENDOR_INTEL:  stream_append_s8(&s, s8("Intel\n"));  break;
    155 	case GL_VENDOR_NVIDIA: stream_append_s8(&s, s8("nVidia\n")); break;
    156 	}
    157 
    158 	#define X(glname, name, suffix) \
    159 		stream_append_s8(&s, s8(#name ":"));                     \
    160 		stream_pad(&s, ' ', max_width - (i32)s8(#name ":").len); \
    161 		stream_append_i64(&s, gl->name);                         \
    162 		stream_append_s8(&s, s8(suffix));                        \
    163 		stream_append_byte(&s, '\n');
    164 	GL_PARAMETERS
    165 	#undef X
    166 	stream_append_s8(&s, s8("-----------------------\n"));
    167 	os_write_file(os->error_handle, stream_to_s8(&s));
    168 #endif
    169 }
    170 
    171 function FILE_WATCH_CALLBACK_FN(reload_shader)
    172 {
    173 	ShaderReloadContext  *ctx  = (typeof(ctx))user_data;
    174 	BeamformerShaderKind  kind = beamformer_reloadable_shader_kinds[ctx->reloadable_info_index];
    175 	return beamformer_reload_shader(os, path, ctx, arena, beamformer_shader_names[kind]);
    176 }
    177 
    178 typedef struct {
    179 	BeamformerCtx        *beamformer;
    180 	BeamformerShaderKind  shader;
    181 } BeamformerShaderReloadIndirectContext;
    182 
    183 function FILE_WATCH_CALLBACK_FN(reload_shader_indirect)
    184 {
    185 	BeamformerShaderReloadIndirectContext *rsi = (typeof(rsi))user_data;
    186 	BeamformerCtx *ctx = rsi->beamformer;
    187 	BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue);
    188 	if (work) {
    189 		work->kind = BeamformerWorkKind_ReloadShader,
    190 		work->reload_shader = rsi->shader;
    191 		beamform_work_queue_push_commit(ctx->beamform_work_queue);
    192 		os_wake_waiters(&os->compute_worker.sync_variable);
    193 	}
    194 	return 1;
    195 }
    196 
    197 function FILE_WATCH_CALLBACK_FN(load_cuda_library)
    198 {
    199 	local_persist void *cuda_library_handle;
    200 
    201 	GLParams *gl = (typeof(gl))user_data;
    202 	/* TODO(rnp): (25.10.30) registering the rf buffer with CUDA is currently
    203 	 * causing a major performance regression. for now we are disabling its use
    204 	 * altogether. it will be reenabled once the issue can be fixed */
    205 	b32 result = 0 && gl->vendor_id == GL_VENDOR_NVIDIA && os_file_exists((c8 *)path.data);
    206 	if (result) {
    207 		Stream err = arena_stream(arena);
    208 
    209 		stream_append_s8(&err, s8("loading CUDA library: " OS_CUDA_LIB_NAME "\n"));
    210 		os_unload_library(cuda_library_handle);
    211 		cuda_library_handle = os_load_library((c8 *)path.data, OS_CUDA_LIB_TEMP_NAME, &err);
    212 		#define X(name, symname) cuda_## name = os_lookup_dynamic_symbol(cuda_library_handle, symname, &err);
    213 		CUDALibraryProcedureList
    214 		#undef X
    215 
    216 		os_write_file(os->error_handle, stream_to_s8(&err));
    217 	}
    218 
    219 	#define X(name, symname) if (!cuda_## name) cuda_## name = cuda_ ## name ## _stub;
    220 	CUDALibraryProcedureList
    221 	#undef X
    222 
    223 	return result;
    224 }
    225 
    226 function BeamformerRenderModel
    227 render_model_from_arrays(f32 *vertices, f32 *normals, i32 vertices_size, u16 *indices, i32 index_count)
    228 {
    229 	BeamformerRenderModel result = {0};
    230 
    231 	i32 buffer_size    = vertices_size * 2 + index_count * (i32)sizeof(u16);
    232 	i32 indices_offset = vertices_size * 2;
    233 	i32 indices_size   = index_count * (i32)sizeof(u16);
    234 
    235 	result.elements        = index_count;
    236 	result.elements_offset = indices_offset;
    237 
    238 	glCreateBuffers(1, &result.buffer);
    239 	glNamedBufferStorage(result.buffer, buffer_size, 0, GL_DYNAMIC_STORAGE_BIT);
    240 	glNamedBufferSubData(result.buffer, 0,              vertices_size, vertices);
    241 	glNamedBufferSubData(result.buffer, vertices_size,  vertices_size, normals);
    242 	glNamedBufferSubData(result.buffer, indices_offset, indices_size,  indices);
    243 
    244 	glCreateVertexArrays(1, &result.vao);
    245 	glVertexArrayVertexBuffer(result.vao, 0, result.buffer, 0,             3 * sizeof(f32));
    246 	glVertexArrayVertexBuffer(result.vao, 1, result.buffer, vertices_size, 3 * sizeof(f32));
    247 	glVertexArrayElementBuffer(result.vao, result.buffer);
    248 
    249 	glEnableVertexArrayAttrib(result.vao, 0);
    250 	glEnableVertexArrayAttrib(result.vao, 1);
    251 
    252 	glVertexArrayAttribFormat(result.vao, 0, 3, GL_FLOAT, 0, 0);
    253 	glVertexArrayAttribFormat(result.vao, 1, 3, GL_FLOAT, 0, (u32)vertices_size);
    254 
    255 	glVertexArrayAttribBinding(result.vao, 0, 0);
    256 	glVertexArrayAttribBinding(result.vao, 1, 0);
    257 
    258 	return result;
    259 }
    260 
    261 #define GLFW_VISIBLE 0x00020004
    262 void glfwWindowHint(i32, i32);
    263 iptr glfwCreateWindow(i32, i32, char *, iptr, iptr);
    264 void glfwMakeContextCurrent(iptr);
    265 
    266 function void
    267 worker_thread_sleep(GLWorkerThreadContext *ctx, BeamformerSharedMemory *sm)
    268 {
    269 	for (;;) {
    270 		i32 expected = 0;
    271 		if (atomic_cas_u32(&ctx->sync_variable, &expected, 1) ||
    272 		    atomic_load_u32(&sm->live_imaging_parameters.active))
    273 		{
    274 			break;
    275 		}
    276 
    277 		atomic_store_u32(&ctx->asleep, 1);
    278 		os_wait_on_value(&ctx->sync_variable, 1, (u32)-1);
    279 		atomic_store_u32(&ctx->asleep, 0);
    280 	}
    281 }
    282 
    283 function OS_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point)
    284 {
    285 	GLWorkerThreadContext *ctx = (GLWorkerThreadContext *)_ctx;
    286 
    287 	glfwMakeContextCurrent(ctx->window_handle);
    288 	ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
    289 
    290 	BeamformerCtx *beamformer = (BeamformerCtx *)ctx->user_context;
    291 	glCreateQueries(GL_TIME_ELAPSED, countof(beamformer->compute_context.shader_timer_ids),
    292 	                beamformer->compute_context.shader_timer_ids);
    293 
    294 	for (;;) {
    295 		worker_thread_sleep(ctx, beamformer->shared_memory.region);
    296 		asan_poison_region(ctx->arena.beg, ctx->arena.end - ctx->arena.beg);
    297 		beamformer_complete_compute(ctx->user_context, &ctx->arena, ctx->gl_context);
    298 	}
    299 
    300 	unreachable();
    301 
    302 	return 0;
    303 }
    304 
    305 function OS_THREAD_ENTRY_POINT_FN(upload_worker_thread_entry_point)
    306 {
    307 	GLWorkerThreadContext *ctx = (GLWorkerThreadContext *)_ctx;
    308 	glfwMakeContextCurrent(ctx->window_handle);
    309 	ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
    310 
    311 	BeamformerUploadThreadContext *up = (typeof(up))ctx->user_context;
    312 	glCreateQueries(GL_TIMESTAMP, 1, &up->rf_buffer->data_timestamp_query);
    313 	/* NOTE(rnp): start this here so we don't have to worry about it being started or not */
    314 	glQueryCounter(up->rf_buffer->data_timestamp_query, GL_TIMESTAMP);
    315 
    316 	for (;;) {
    317 		worker_thread_sleep(ctx, up->shared_memory->region);
    318 		asan_poison_region(ctx->arena.beg, ctx->arena.end - ctx->arena.beg);
    319 		beamformer_rf_upload(up, ctx->arena);
    320 	}
    321 
    322 	unreachable();
    323 
    324 	return 0;
    325 }
    326 
    327 function void
    328 setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input)
    329 {
    330 	Arena  compute_arena = sub_arena(memory, MB(2),  KB(4));
    331 	Arena  upload_arena  = sub_arena(memory, KB(64), KB(4));
    332 	Stream error         = stream_alloc(memory, MB(1));
    333 	Arena  ui_arena      = sub_arena(memory, MB(2), KB(4));
    334 
    335 	Arena scratch = {.beg = memory->end - 4096L, .end = memory->end};
    336 	memory->end = scratch.beg;
    337 
    338 	BeamformerCtx   *ctx   = *o_ctx   = push_struct(memory, typeof(*ctx));
    339 	BeamformerInput *input = *o_input = push_struct(memory, typeof(*input));
    340 
    341 	ctx->window_size = (iv2){{1280, 840}};
    342 	ctx->error_stream = error;
    343 	ctx->ui_backing_store = ui_arena;
    344 	input->executable_reloaded = 1;
    345 
    346 	os_init(&ctx->os, memory);
    347 	ctx->os.path_separator        = s8(OS_PATH_SEPARATOR);
    348 	ctx->os.compute_worker.arena  = compute_arena;
    349 	ctx->os.compute_worker.asleep = 1;
    350 	ctx->os.upload_worker.arena   = upload_arena;
    351 	ctx->os.upload_worker.asleep  = 1;
    352 
    353 	debug_init(&ctx->os, (iptr)input, memory);
    354 
    355 	SetConfigFlags(FLAG_VSYNC_HINT|FLAG_WINDOW_ALWAYS_RUN);
    356 	InitWindow(ctx->window_size.w, ctx->window_size.h, "OGL Beamformer");
    357 	/* NOTE: do this after initing so that the window starts out floating in tiling wm */
    358 	SetWindowState(FLAG_WINDOW_RESIZABLE);
    359 	SetWindowMinSize(840, ctx->window_size.h);
    360 
    361 	glfwWindowHint(GLFW_VISIBLE, 0);
    362 	iptr raylib_window_handle = (iptr)GetPlatformWindowHandle();
    363 
    364 	#define X(name, ret, params) name = (name##_fn *)os_gl_proc_address(#name);
    365 	OGLProcedureList
    366 	#undef X
    367 	/* NOTE: Gather information about the GPU */
    368 	get_gl_params(&ctx->gl, &ctx->error_stream);
    369 	dump_gl_params(&ctx->gl, *memory, &ctx->os);
    370 	validate_gl_requirements(&ctx->gl, *memory);
    371 
    372 	ctx->beamform_work_queue  = push_struct(memory, BeamformWorkQueue);
    373 	ctx->compute_shader_stats = push_struct(memory, ComputeShaderStats);
    374 	ctx->compute_timing_table = push_struct(memory, ComputeTimingTable);
    375 
    376 	/* TODO(rnp): I'm not sure if its a good idea to pre-reserve a bunch of semaphores
    377 	 * on w32 but thats what we are doing for now */
    378 	u32 lock_count = (u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots;
    379 	ctx->shared_memory = os_create_shared_memory_area(memory, OS_SHARED_MEMORY_NAME, lock_count,
    380 	                                                  BEAMFORMER_SHARED_MEMORY_SIZE);
    381 	BeamformerSharedMemory *sm = ctx->shared_memory.region;
    382 	if (!sm) os_fatal(s8("Get more ram lol\n"));
    383 	mem_clear(sm, 0, sizeof(*sm));
    384 
    385 	sm->version = BEAMFORMER_SHARED_MEMORY_VERSION;
    386 	sm->reserved_parameter_blocks = 1;
    387 
    388 	BeamformerComputeContext *cs = &ctx->compute_context;
    389 
    390 	GLWorkerThreadContext *worker = &ctx->os.compute_worker;
    391 	/* TODO(rnp): we should lock this down after we have something working */
    392 	worker->user_context  = (iptr)ctx;
    393 	worker->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle);
    394 	worker->handle        = os_create_thread(*memory, (iptr)worker, s8("[compute]"),
    395 	                                         compute_worker_thread_entry_point);
    396 
    397 	GLWorkerThreadContext         *upload = &ctx->os.upload_worker;
    398 	BeamformerUploadThreadContext *upctx  = push_struct(memory, typeof(*upctx));
    399 	upload->user_context = (iptr)upctx;
    400 	upctx->rf_buffer     = &cs->rf_buffer;
    401 	upctx->shared_memory = &ctx->shared_memory;
    402 	upctx->compute_timing_table = ctx->compute_timing_table;
    403 	upctx->compute_worker_sync  = &ctx->os.compute_worker.sync_variable;
    404 	upload->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle);
    405 	upload->handle        = os_create_thread(*memory, (iptr)upload, s8("[upload]"),
    406 	                                         upload_worker_thread_entry_point);
    407 
    408 	glfwMakeContextCurrent(raylib_window_handle);
    409 
    410 	if (load_cuda_library(&ctx->os, s8(OS_CUDA_LIB_NAME), (iptr)&ctx->gl, *memory))
    411 		os_add_file_watch(&ctx->os, memory, s8(OS_CUDA_LIB_NAME), load_cuda_library, (iptr)&ctx->gl);
    412 
    413 	/* NOTE: set up OpenGL debug logging */
    414 	struct gl_debug_ctx *gl_debug_ctx = push_struct(memory, typeof(*gl_debug_ctx));
    415 	gl_debug_ctx->stream          = stream_alloc(memory, 1024);
    416 	gl_debug_ctx->os_error_handle = ctx->os.error_handle;
    417 	glDebugMessageCallback(gl_debug_logger, gl_debug_ctx);
    418 #ifdef _DEBUG
    419 	glEnable(GL_DEBUG_OUTPUT);
    420 #endif
    421 
    422 	if (!BakeShaders)
    423 	{
    424 		for EachElement(beamformer_reloadable_compute_shader_info_indices, it) {
    425 			i32   index = beamformer_reloadable_compute_shader_info_indices[it];
    426 			Arena temp  = scratch;
    427 			s8 file = push_s8_from_parts(&temp, s8(OS_PATH_SEPARATOR), s8("shaders"),
    428 			                             beamformer_reloadable_shader_files[index]);
    429 
    430 			BeamformerShaderReloadIndirectContext *rsi = push_struct(memory, typeof(*rsi));
    431 			rsi->beamformer = ctx;
    432 			rsi->shader     = beamformer_reloadable_shader_kinds[index];
    433 			os_add_file_watch(&ctx->os, memory, file, reload_shader_indirect, (iptr)rsi);
    434 			reload_shader_indirect(&ctx->os, file, (iptr)rsi, *memory);
    435 		}
    436 		os_wake_waiters(&worker->sync_variable);
    437 	}
    438 
    439 	FrameViewRenderContext *fvr = &ctx->frame_view_render_context;
    440 	glCreateFramebuffers(countof(fvr->framebuffers), fvr->framebuffers);
    441 	LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[0], s8("Frame View Framebuffer"));
    442 	LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[1], s8("Frame View Resolving Framebuffer"));
    443 
    444 	glCreateRenderbuffers(countof(fvr->renderbuffers), fvr->renderbuffers);
    445 	i32 msaa_samples = ctx->gl.vendor_id == GL_VENDOR_ARM? 4 : 8;
    446 	glNamedRenderbufferStorageMultisample(fvr->renderbuffers[0], msaa_samples, GL_RGBA8,
    447 	                                      FRAME_VIEW_RENDER_TARGET_SIZE);
    448 	glNamedRenderbufferStorageMultisample(fvr->renderbuffers[1], msaa_samples, GL_DEPTH_COMPONENT24,
    449 	                                      FRAME_VIEW_RENDER_TARGET_SIZE);
    450 
    451 	static_assert(countof(beamformer_reloadable_render_shader_info_indices) == 1,
    452 	              "only a single render shader is currently handled");
    453 	i32 render_rsi_index = beamformer_reloadable_render_shader_info_indices[0];
    454 
    455 	Arena *arena = BakeShaders? &scratch : memory;
    456 	ShaderReloadContext *render_3d = push_struct(arena, typeof(*render_3d));
    457 	render_3d->beamformer_context    = ctx;
    458 	render_3d->reloadable_info_index = render_rsi_index;
    459 	render_3d->gl_type = GL_FRAGMENT_SHADER;
    460 	render_3d->header  = s8(""
    461 	"layout(location = 0) in  vec3 normal;\n"
    462 	"layout(location = 1) in  vec3 texture_coordinate;\n\n"
    463 	"layout(location = 2) in  vec3 test_texture_coordinate;\n\n"
    464 	"layout(location = 0) out vec4 out_colour;\n\n"
    465 	"layout(location = " str(FRAME_VIEW_DYNAMIC_RANGE_LOC) ") uniform float u_db_cutoff = 60;\n"
    466 	"layout(location = " str(FRAME_VIEW_THRESHOLD_LOC)     ") uniform float u_threshold = 40;\n"
    467 	"layout(location = " str(FRAME_VIEW_GAMMA_LOC)         ") uniform float u_gamma     = 1;\n"
    468 	"layout(location = " str(FRAME_VIEW_LOG_SCALE_LOC)     ") uniform bool  u_log_scale;\n"
    469 	"layout(location = " str(FRAME_VIEW_BB_COLOUR_LOC)     ") uniform vec4  u_bb_colour   = vec4(" str(FRAME_VIEW_BB_COLOUR) ");\n"
    470 	"layout(location = " str(FRAME_VIEW_BB_FRACTION_LOC)   ") uniform float u_bb_fraction = " str(FRAME_VIEW_BB_FRACTION) ";\n"
    471 	"layout(location = " str(FRAME_VIEW_SOLID_BB_LOC)      ") uniform bool  u_solid_bb;\n"
    472 	"\n"
    473 	"layout(binding = 0) uniform sampler3D u_texture;\n");
    474 
    475 	render_3d->link = push_struct(arena, typeof(*render_3d));
    476 	render_3d->link->reloadable_info_index = -1;
    477 	render_3d->link->gl_type = GL_VERTEX_SHADER;
    478 	render_3d->link->link    = render_3d;
    479 	render_3d->link->header  = s8(""
    480 	"layout(location = 0) in vec3 v_position;\n"
    481 	"layout(location = 1) in vec3 v_normal;\n"
    482 	"\n"
    483 	"layout(location = 0) out vec3 f_normal;\n"
    484 	"layout(location = 1) out vec3 f_texture_coordinate;\n"
    485 	"layout(location = 2) out vec3 f_orig_texture_coordinate;\n"
    486 	"\n"
    487 	"layout(location = " str(FRAME_VIEW_MODEL_MATRIX_LOC)  ") uniform mat4  u_model;\n"
    488 	"layout(location = " str(FRAME_VIEW_VIEW_MATRIX_LOC)   ") uniform mat4  u_view;\n"
    489 	"layout(location = " str(FRAME_VIEW_PROJ_MATRIX_LOC)   ") uniform mat4  u_projection;\n"
    490 	"\n"
    491 	"\n"
    492 	"void main()\n"
    493 	"{\n"
    494 	"\tvec3 pos = v_position;\n"
    495 	"\tf_orig_texture_coordinate = (2 * v_position + 1) / 2;\n"
    496 	//"\tif (v_position.y == -1) pos.x = clamp(v_position.x, -u_clip_fraction, u_clip_fraction);\n"
    497 	"\tvec3 tex_coord = (2 * pos + 1) / 2;\n"
    498 	"\tf_texture_coordinate = tex_coord.xzy;\n"
    499 	//"\tf_texture_coordinate = u_swizzle? tex_coord.xzy : tex_coord;\n"
    500 	//"\tf_normal    = normalize(mat3(u_model) * v_normal);\n"
    501 	"\tf_normal    = v_normal;\n"
    502 	"\tgl_Position = u_projection * u_view * u_model * vec4(pos, 1);\n"
    503 	"}\n");
    504 
    505 	s8 render_file = {0};
    506 	if (!BakeShaders) {
    507 		render_file = push_s8_from_parts(&scratch, s8(OS_PATH_SEPARATOR), s8("shaders"),
    508 		                                 beamformer_reloadable_shader_files[render_rsi_index]);
    509 		os_add_file_watch(&ctx->os, memory, render_file, reload_shader, (iptr)render_3d);
    510 	}
    511 	reload_shader(&ctx->os, render_file, (iptr)render_3d, *memory);
    512 
    513 	f32 unit_cube_vertices[] = {
    514 		 0.5f,  0.5f, -0.5f,
    515 		 0.5f,  0.5f, -0.5f,
    516 		 0.5f,  0.5f, -0.5f,
    517 		 0.5f, -0.5f, -0.5f,
    518 		 0.5f, -0.5f, -0.5f,
    519 		 0.5f, -0.5f, -0.5f,
    520 		 0.5f,  0.5f,  0.5f,
    521 		 0.5f,  0.5f,  0.5f,
    522 		 0.5f,  0.5f,  0.5f,
    523 		 0.5f, -0.5f,  0.5f,
    524 		 0.5f, -0.5f,  0.5f,
    525 		 0.5f, -0.5f,  0.5f,
    526 		-0.5f,  0.5f, -0.5f,
    527 		-0.5f,  0.5f, -0.5f,
    528 		-0.5f,  0.5f, -0.5f,
    529 		-0.5f, -0.5f, -0.5f,
    530 		-0.5f, -0.5f, -0.5f,
    531 		-0.5f, -0.5f, -0.5f,
    532 		-0.5f,  0.5f,  0.5f,
    533 		-0.5f,  0.5f,  0.5f,
    534 		-0.5f,  0.5f,  0.5f,
    535 		-0.5f, -0.5f,  0.5f,
    536 		-0.5f, -0.5f,  0.5f,
    537 		-0.5f, -0.5f,  0.5f
    538 	};
    539 	f32 unit_cube_normals[] = {
    540 		 0.0f,  0.0f, -1.0f,
    541 		 0.0f,  1.0f,  0.0f,
    542 		 1.0f,  0.0f,  0.0f,
    543 		 0.0f,  0.0f, -1.0f,
    544 		 0.0f, -1.0f,  0.0f,
    545 		 1.0f,  0.0f,  0.0f,
    546 		 0.0f,  0.0f,  1.0f,
    547 		 0.0f,  1.0f,  0.0f,
    548 		 1.0f,  0.0f,  0.0f,
    549 		 0.0f,  0.0f,  1.0f,
    550 		 0.0f, -1.0f,  0.0f,
    551 		 1.0f,  0.0f,  0.0f,
    552 		 0.0f,  0.0f, -1.0f,
    553 		 0.0f,  1.0f,  0.0f,
    554 		-1.0f,  0.0f,  0.0f,
    555 		 0.0f,  0.0f, -1.0f,
    556 		 0.0f, -1.0f,  0.0f,
    557 		-1.0f,  0.0f,  0.0f,
    558 		 0.0f,  0.0f,  1.0f,
    559 		 0.0f,  1.0f,  0.0f,
    560 		-1.0f,  0.0f,  0.0f,
    561 		 0.0f,  0.0f,  1.0f,
    562 		 0.0f, -1.0f,  0.0f,
    563 		-1.0f,  0.0f,  0.0f
    564 	};
    565 	u16 unit_cube_indices[] = {
    566 		1,  13, 19,
    567 		1,  19, 7,
    568 		9,  6,  18,
    569 		9,  18, 21,
    570 		23, 20, 14,
    571 		23, 14, 17,
    572 		16, 4,  10,
    573 		16, 10, 22,
    574 		5,  2,  8,
    575 		5,  8,  11,
    576 		15, 12, 0,
    577 		15, 0,  3
    578 	};
    579 
    580 	cs->unit_cube_model = render_model_from_arrays(unit_cube_vertices, unit_cube_normals,
    581 	                                               sizeof(unit_cube_vertices),
    582 	                                               unit_cube_indices, countof(unit_cube_indices));
    583 
    584 	memory->end = scratch.end;
    585 }
    586 
    587 function void
    588 beamformer_invalidate_shared_memory(BeamformerCtx *ctx)
    589 {
    590 	/* NOTE(rnp): work around pebkac when the beamformer is closed while we are doing live
    591 	 * imaging. if the verasonics is blocked in an external function (calling the library
    592 	 * to start compute) it is impossible for us to get it to properly shut down which
    593 	 * will sometimes result in us needing to power cycle the system. set the shared memory
    594 	 * into an error state and release dispatch lock so that future calls will error instead
    595 	 * of blocking.
    596 	 */
    597 	BeamformerSharedMemory *sm = ctx->shared_memory.region;
    598 	BeamformerSharedMemoryLockKind lock = BeamformerSharedMemoryLockKind_DispatchCompute;
    599 	atomic_store_u32(&sm->invalid, 1);
    600 	atomic_store_u32(&sm->external_work_queue.ridx, sm->external_work_queue.widx);
    601 	DEBUG_DECL(if (sm->locks[lock])) {
    602 		os_shared_memory_region_unlock(&ctx->shared_memory, sm->locks, (i32)lock);
    603 	}
    604 
    605 	atomic_or_u32(&sm->live_imaging_dirty_flags, BeamformerLiveImagingDirtyFlags_StopImaging);
    606 }