ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

static.c (21728B)


      1 /* See LICENSE for license details. */
      2 
      3 /* NOTE(rnp): magic variables to force discrete GPU usage on laptops with multiple devices */
      4 EXPORT i32 NvOptimusEnablement = 1;
      5 EXPORT i32 AmdPowerXpressRequestHighPerformance = 1;
      6 
      7 #ifndef _DEBUG
      8 
      9 #include "beamformer.c"
     10 #define debug_init(...)
     11 
     12 #else
     13 
     14 global void *debug_lib;
     15 
     16 #define DEBUG_ENTRY_POINTS \
     17 	X(beamformer_debug_ui_deinit)  \
     18 	X(beamformer_complete_compute) \
     19 	X(beamformer_frame_step)       \
     20 	X(beamformer_reload_shader)    \
     21 	X(beamformer_rf_upload)
     22 
     23 #define X(name) global name ##_fn *name;
     24 DEBUG_ENTRY_POINTS
     25 #undef X
     26 
     27 struct debug_context {
     28 	BeamformerInput *input;
     29 	b32 *compute_worker_asleep;
     30 	b32 *upload_worker_asleep;
     31 };
     32 
     33 function FILE_WATCH_CALLBACK_FN(debug_reload)
     34 {
     35 	struct debug_context *ctx = (struct debug_context *)user_data;
     36 	Stream err = arena_stream(arena);
     37 
     38 	/* NOTE(rnp): spin until compute thread finishes its work (we will probably
     39 	 * never reload while compute is in progress but just incase). */
     40 	spin_wait(!atomic_load_u32(ctx->compute_worker_asleep));
     41 	spin_wait(!atomic_load_u32(ctx->upload_worker_asleep));
     42 
     43 	os_unload_library(debug_lib);
     44 	debug_lib = os_load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME, &err);
     45 
     46 	#define X(name) name = os_lookup_dynamic_symbol(debug_lib, #name, &err);
     47 	DEBUG_ENTRY_POINTS
     48 	#undef X
     49 
     50 	stream_append_s8(&err, s8("Reloaded Main Executable\n"));
     51 	os_write_file(os_error_handle(), stream_to_s8(&err));
     52 
     53 	ctx->input->executable_reloaded = 1;
     54 
     55 	return 1;
     56 }
     57 
     58 function void
     59 debug_init(BeamformerCtx *ctx, BeamformerInput *input, Arena *arena)
     60 {
     61 	struct debug_context *dctx = push_struct(arena, struct debug_context);
     62 	dctx->input = input;
     63 	dctx->compute_worker_asleep = &ctx->compute_worker.asleep;
     64 	dctx->upload_worker_asleep  = &ctx->upload_worker.asleep;
     65 	os_add_file_watch(s8(OS_DEBUG_LIB_NAME), debug_reload, (iptr)dctx);
     66 	debug_reload(s8(""), (iptr)dctx, *arena);
     67 
     68 	Stream err = arena_stream(*arena);
     69 	void *rdoc = os_get_module(OS_RENDERDOC_SONAME, 0);
     70 	if (rdoc) {
     71 		renderdoc_get_api_fn *get_api = os_lookup_dynamic_symbol(rdoc, "RENDERDOC_GetAPI", &err);
     72 		if (get_api) {
     73 			RenderDocAPI *api = 0;
     74 			if (get_api(10600, (void **)&api)) {
     75 				ctx->start_frame_capture = RENDERDOC_START_FRAME_CAPTURE(api);
     76 				ctx->end_frame_capture   = RENDERDOC_END_FRAME_CAPTURE(api);
     77 				stream_append_s8(&err, s8("loaded: " OS_RENDERDOC_SONAME "\n"));
     78 			}
     79 		}
     80 	}
     81 
     82 	os_write_file(os_error_handle(), stream_to_s8(&err));
     83 }
     84 
     85 #endif /* _DEBUG */
     86 
     87 function void
     88 gl_debug_logger(u32 src, u32 type, u32 id, u32 lvl, i32 len, const char *msg, const void *userctx)
     89 {
     90 	Stream *e = (Stream *)userctx;
     91 	stream_append_s8s(e, s8("[OpenGL] "), (s8){.len = len, .data = (u8 *)msg}, s8("\n"));
     92 	os_write_file(os_error_handle(), stream_to_s8(e));
     93 	stream_reset(e, 0);
     94 }
     95 
     96 function void
     97 load_gl(Stream *err)
     98 {
     99 	#define X(name, ret, params) name = (name##_fn *)os_gl_proc_address(#name);
    100 	OGLProcedureList
    101 	#undef X
    102 
    103 	/* NOTE: Gather information about the GPU */
    104 	{
    105 		char *vendor = (char *)glGetString(GL_VENDOR);
    106 		if (!vendor) {
    107 			stream_append_s8(err, s8("Failed to determine GL Vendor\n"));
    108 			os_fatal(stream_to_s8(err));
    109 		}
    110 		/* TODO(rnp): str prefix of */
    111 		switch (vendor[0]) {
    112 		case 'A': gl_parameters.vendor_id = GLVendor_AMD;    break;
    113 		case 'I': gl_parameters.vendor_id = GLVendor_Intel;  break;
    114 		case 'N': gl_parameters.vendor_id = GLVendor_NVIDIA; break;
    115 		/* NOTE(rnp): freedreno */
    116 		case 'f': gl_parameters.vendor_id = GLVendor_ARM;    break;
    117 		/* NOTE(rnp): Microsoft Corporation - weird win32 thing (microsoft is just using mesa for the driver) */
    118 		case 'M': gl_parameters.vendor_id = GLVendor_ARM;    break;
    119 		default:
    120 			stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n"));
    121 			os_fatal(stream_to_s8(err));
    122 		}
    123 
    124 		#define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl_parameters.name);
    125 		GL_PARAMETERS
    126 		#undef X
    127 	}
    128 
    129 #ifdef _DEBUG
    130 	{
    131 		s8 vendor = s8("vendor:");
    132 		i32 max_width = (i32)vendor.len;
    133 		#define X(glname, name, suffix) if (s8(#name).len > max_width) max_width = (i32)s8(#name ":").len;
    134 		GL_PARAMETERS
    135 		#undef X
    136 		max_width++;
    137 
    138 		stream_append_s8s(err, s8("---- GL Parameters ----\n"), vendor);
    139 		stream_pad(err, ' ', max_width - (i32)vendor.len);
    140 		switch (gl_parameters.vendor_id) {
    141 		case GLVendor_AMD:    stream_append_s8(err, s8("AMD"));    break;
    142 		case GLVendor_ARM:    stream_append_s8(err, s8("ARM"));    break;
    143 		case GLVendor_Intel:  stream_append_s8(err, s8("Intel"));  break;
    144 		case GLVendor_NVIDIA: stream_append_s8(err, s8("nVidia")); break;
    145 		}
    146 		stream_append_byte(err, '\n');
    147 
    148 		#define X(glname, name, suffix) \
    149 			stream_append_s8(err, s8(#name ":"));                     \
    150 			stream_pad(err, ' ', max_width - (i32)s8(#name ":").len); \
    151 			stream_append_i64(err, gl_parameters.name);               \
    152 			stream_append_s8(err, s8(suffix "\n"));
    153 		GL_PARAMETERS
    154 		#undef X
    155 		stream_append_s8(err, s8("-----------------------\n"));
    156 		os_write_file(os_error_handle(), stream_to_s8(err));
    157 	}
    158 #endif
    159 
    160 	{
    161 		stream_reset(err, 0);
    162 		if (gl_parameters.max_ubo_size < (i32)sizeof(BeamformerParameters)) {
    163 			stream_append_s8(err, s8("GPU must support UBOs of at least "));
    164 			stream_append_i64(err, sizeof(BeamformerParameters));
    165 			stream_append_s8(err, s8(" bytes!\n"));
    166 		}
    167 
    168 		#define X(name, ret, params) if (!name) stream_append_s8(err, s8("missing required GL function: " #name "\n"));
    169 		OGLProcedureList
    170 		#undef X
    171 
    172 		if (err->widx) os_fatal(stream_to_s8(err));
    173 	}
    174 }
    175 
    176 function FILE_WATCH_CALLBACK_FN(reload_shader)
    177 {
    178 	ShaderReloadContext  *ctx  = (typeof(ctx))user_data;
    179 	BeamformerShaderKind  kind = beamformer_reloadable_shader_kinds[ctx->reloadable_info_index];
    180 	return beamformer_reload_shader(path, ctx, arena, beamformer_shader_names[kind]);
    181 }
    182 
    183 typedef struct {
    184 	BeamformerCtx        *beamformer;
    185 	BeamformerShaderKind  shader;
    186 } BeamformerShaderReloadIndirectContext;
    187 
    188 function FILE_WATCH_CALLBACK_FN(reload_shader_indirect)
    189 {
    190 	BeamformerShaderReloadIndirectContext *rsi = (typeof(rsi))user_data;
    191 	BeamformerCtx *ctx = rsi->beamformer;
    192 	BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue);
    193 	if (work) {
    194 		work->kind = BeamformerWorkKind_ReloadShader,
    195 		work->reload_shader = rsi->shader;
    196 		beamform_work_queue_push_commit(ctx->beamform_work_queue);
    197 		os_wake_waiters(&ctx->compute_worker.sync_variable);
    198 	}
    199 	return 1;
    200 }
    201 
    202 function FILE_WATCH_CALLBACK_FN(load_cuda_library)
    203 {
    204 	local_persist void *cuda_library_handle;
    205 
    206 	/* TODO(rnp): (25.10.30) registering the rf buffer with CUDA is currently
    207 	 * causing a major performance regression. for now we are disabling its use
    208 	 * altogether. it will be reenabled once the issue can be fixed */
    209 	b32 result = 0 && gl_parameters.vendor_id == GLVendor_NVIDIA && os_file_exists((c8 *)path.data);
    210 	if (result) {
    211 		Stream err = arena_stream(arena);
    212 
    213 		stream_append_s8(&err, s8("loading CUDA library: " OS_CUDA_LIB_NAME "\n"));
    214 		os_unload_library(cuda_library_handle);
    215 		cuda_library_handle = os_load_library((c8 *)path.data, OS_CUDA_LIB_TEMP_NAME, &err);
    216 		#define X(name, symname) cuda_## name = os_lookup_dynamic_symbol(cuda_library_handle, symname, &err);
    217 		CUDALibraryProcedureList
    218 		#undef X
    219 
    220 		os_write_file(os_error_handle(), stream_to_s8(&err));
    221 	}
    222 
    223 	#define X(name, symname) if (!cuda_## name) cuda_## name = cuda_ ## name ## _stub;
    224 	CUDALibraryProcedureList
    225 	#undef X
    226 
    227 	return result;
    228 }
    229 
    230 function BeamformerRenderModel
    231 render_model_from_arrays(f32 *vertices, f32 *normals, i32 vertices_size, u16 *indices, i32 index_count)
    232 {
    233 	BeamformerRenderModel result = {0};
    234 
    235 	i32 buffer_size    = vertices_size * 2 + index_count * (i32)sizeof(u16);
    236 	i32 indices_offset = vertices_size * 2;
    237 	i32 indices_size   = index_count * (i32)sizeof(u16);
    238 
    239 	result.elements        = index_count;
    240 	result.elements_offset = indices_offset;
    241 
    242 	glCreateBuffers(1, &result.buffer);
    243 	glNamedBufferStorage(result.buffer, buffer_size, 0, GL_DYNAMIC_STORAGE_BIT);
    244 	glNamedBufferSubData(result.buffer, 0,              vertices_size, vertices);
    245 	glNamedBufferSubData(result.buffer, vertices_size,  vertices_size, normals);
    246 	glNamedBufferSubData(result.buffer, indices_offset, indices_size,  indices);
    247 
    248 	glCreateVertexArrays(1, &result.vao);
    249 	glVertexArrayVertexBuffer(result.vao, 0, result.buffer, 0,             3 * sizeof(f32));
    250 	glVertexArrayVertexBuffer(result.vao, 1, result.buffer, vertices_size, 3 * sizeof(f32));
    251 	glVertexArrayElementBuffer(result.vao, result.buffer);
    252 
    253 	glEnableVertexArrayAttrib(result.vao, 0);
    254 	glEnableVertexArrayAttrib(result.vao, 1);
    255 
    256 	glVertexArrayAttribFormat(result.vao, 0, 3, GL_FLOAT, 0, 0);
    257 	glVertexArrayAttribFormat(result.vao, 1, 3, GL_FLOAT, 0, (u32)vertices_size);
    258 
    259 	glVertexArrayAttribBinding(result.vao, 0, 0);
    260 	glVertexArrayAttribBinding(result.vao, 1, 0);
    261 
    262 	return result;
    263 }
    264 
    265 #define GLFW_VISIBLE 0x00020004
    266 void glfwWindowHint(i32, i32);
    267 iptr glfwCreateWindow(i32, i32, char *, iptr, iptr);
    268 void glfwMakeContextCurrent(iptr);
    269 
    270 function void
    271 worker_thread_sleep(GLWorkerThreadContext *ctx, BeamformerSharedMemory *sm)
    272 {
    273 	for (;;) {
    274 		i32 expected = 0;
    275 		if (atomic_cas_u32(&ctx->sync_variable, &expected, 1) ||
    276 		    atomic_load_u32(&sm->live_imaging_parameters.active))
    277 		{
    278 			break;
    279 		}
    280 
    281 		/* TODO(rnp): clean this crap up; we shouldn't need two values to communicate this */
    282 		atomic_store_u32(&ctx->asleep, 1);
    283 		os_wait_on_value(&ctx->sync_variable, 1, (u32)-1);
    284 		atomic_store_u32(&ctx->asleep, 0);
    285 	}
    286 }
    287 
    288 function OS_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point)
    289 {
    290 	GLWorkerThreadContext *ctx = (GLWorkerThreadContext *)_ctx;
    291 
    292 	glfwMakeContextCurrent(ctx->window_handle);
    293 	ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
    294 
    295 	BeamformerCtx *beamformer = (BeamformerCtx *)ctx->user_context;
    296 	glCreateQueries(GL_TIME_ELAPSED, countof(beamformer->compute_context.shader_timer_ids),
    297 	                beamformer->compute_context.shader_timer_ids);
    298 
    299 	for (;;) {
    300 		worker_thread_sleep(ctx, beamformer->shared_memory.region);
    301 		asan_poison_region(ctx->arena.beg, ctx->arena.end - ctx->arena.beg);
    302 		beamformer_complete_compute(ctx->user_context, &ctx->arena, ctx->gl_context);
    303 	}
    304 
    305 	unreachable();
    306 
    307 	return 0;
    308 }
    309 
    310 function OS_THREAD_ENTRY_POINT_FN(beamformer_upload_entry_point)
    311 {
    312 	GLWorkerThreadContext *ctx = (GLWorkerThreadContext *)_ctx;
    313 	glfwMakeContextCurrent(ctx->window_handle);
    314 	ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
    315 
    316 	BeamformerUploadThreadContext *up = (typeof(up))ctx->user_context;
    317 	glCreateQueries(GL_TIMESTAMP, 1, &up->rf_buffer->data_timestamp_query);
    318 	/* NOTE(rnp): start this here so we don't have to worry about it being started or not */
    319 	glQueryCounter(up->rf_buffer->data_timestamp_query, GL_TIMESTAMP);
    320 
    321 	for (;;) {
    322 		worker_thread_sleep(ctx, up->shared_memory->region);
    323 		beamformer_rf_upload(up);
    324 	}
    325 
    326 	unreachable();
    327 
    328 	return 0;
    329 }
    330 
    331 function void
    332 beamformer_init(BeamformerInput *input)
    333 {
    334 	Arena  memory        = input->memory;
    335 	Arena  compute_arena = sub_arena_end(&memory, MB(2), KB(4));
    336 	Arena  upload_arena  = sub_arena_end(&memory, KB(4), KB(4));
    337 	Arena  ui_arena      = sub_arena_end(&memory, MB(2), KB(4));
    338 	Stream error         = arena_stream(sub_arena_end(&memory, MB(1), 1));
    339 	BeamformerCtx *ctx   = push_struct(&memory, BeamformerCtx);
    340 
    341 	Arena scratch = {.beg = memory.end - 4096L, .end = memory.end};
    342 	memory.end = scratch.beg;
    343 
    344 	ctx->window_size = (iv2){{1280, 840}};
    345 	ctx->error_stream = error;
    346 	ctx->ui_backing_store = ui_arena;
    347 	input->executable_reloaded = 1;
    348 
    349 	ctx->compute_worker.arena  = compute_arena;
    350 	ctx->compute_worker.asleep = 1;
    351 	ctx->upload_worker.arena   = upload_arena;
    352 	ctx->upload_worker.asleep  = 1;
    353 
    354 	debug_init(ctx, input, &memory);
    355 
    356 	SetConfigFlags(FLAG_VSYNC_HINT|FLAG_WINDOW_ALWAYS_RUN);
    357 	InitWindow(ctx->window_size.w, ctx->window_size.h, "OGL Beamformer");
    358 	/* NOTE: do this after initing so that the window starts out floating in tiling wm */
    359 	SetWindowState(FLAG_WINDOW_RESIZABLE);
    360 	SetWindowMinSize(840, ctx->window_size.h);
    361 
    362 	glfwWindowHint(GLFW_VISIBLE, 0);
    363 	iptr raylib_window_handle = (iptr)GetPlatformWindowHandle();
    364 
    365 	load_gl(&ctx->error_stream);
    366 
    367 	ctx->beamform_work_queue  = push_struct(&memory, BeamformWorkQueue);
    368 	ctx->compute_shader_stats = push_struct(&memory, ComputeShaderStats);
    369 	ctx->compute_timing_table = push_struct(&memory, ComputeTimingTable);
    370 
    371 	/* TODO(rnp): I'm not sure if its a good idea to pre-reserve a bunch of semaphores
    372 	 * on w32 but thats what we are doing for now */
    373 	u32 lock_count = (u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots;
    374 	ctx->shared_memory = os_create_shared_memory_area(&memory, OS_SHARED_MEMORY_NAME, lock_count,
    375 	                                                  BEAMFORMER_SHARED_MEMORY_SIZE);
    376 	BeamformerSharedMemory *sm = ctx->shared_memory.region;
    377 	if (!sm) os_fatal(s8("Get more ram lol\n"));
    378 	mem_clear(sm, 0, sizeof(*sm));
    379 
    380 	sm->version = BEAMFORMER_SHARED_MEMORY_VERSION;
    381 	sm->reserved_parameter_blocks = 1;
    382 
    383 	BeamformerComputeContext *cs = &ctx->compute_context;
    384 
    385 	GLWorkerThreadContext *worker = &ctx->compute_worker;
    386 	/* TODO(rnp): we should lock this down after we have something working */
    387 	worker->user_context  = (iptr)ctx;
    388 	worker->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle);
    389 	worker->handle        = os_create_thread((iptr)worker, compute_worker_thread_entry_point);
    390 	os_set_thread_name(worker->handle, s8("[compute]"));
    391 
    392 	GLWorkerThreadContext         *upload = &ctx->upload_worker;
    393 	BeamformerUploadThreadContext *upctx  = push_struct(&memory, typeof(*upctx));
    394 	upload->user_context = (iptr)upctx;
    395 	upctx->rf_buffer     = &cs->rf_buffer;
    396 	upctx->shared_memory = &ctx->shared_memory;
    397 	upctx->compute_timing_table = ctx->compute_timing_table;
    398 	upctx->compute_worker_sync  = &ctx->compute_worker.sync_variable;
    399 	upload->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle);
    400 	upload->handle        = os_create_thread((iptr)upload, beamformer_upload_entry_point);
    401 	os_set_thread_name(worker->handle, s8("[upload]"));
    402 
    403 	glfwMakeContextCurrent(raylib_window_handle);
    404 
    405 	if (load_cuda_library(s8(OS_CUDA_LIB_NAME), 0, memory))
    406 		os_add_file_watch(s8(OS_CUDA_LIB_NAME), load_cuda_library, 0);
    407 
    408 	/* NOTE: set up OpenGL debug logging */
    409 	Stream *gl_error_stream = push_struct(&memory, Stream);
    410 	*gl_error_stream        = stream_alloc(&memory, 1024);
    411 	glDebugMessageCallback(gl_debug_logger, gl_error_stream);
    412 #ifdef _DEBUG
    413 	glEnable(GL_DEBUG_OUTPUT);
    414 #endif
    415 
    416 	if (!BakeShaders)
    417 	{
    418 		for EachElement(beamformer_reloadable_compute_shader_info_indices, it) {
    419 			i32   index = beamformer_reloadable_compute_shader_info_indices[it];
    420 			Arena temp  = scratch;
    421 			s8 file = push_s8_from_parts(&temp, s8(OS_PATH_SEPARATOR), s8("shaders"),
    422 			                             beamformer_reloadable_shader_files[index]);
    423 
    424 			BeamformerShaderReloadIndirectContext *rsi = push_struct(&memory, typeof(*rsi));
    425 			rsi->beamformer = ctx;
    426 			rsi->shader     = beamformer_reloadable_shader_kinds[index];
    427 			os_add_file_watch(file, reload_shader_indirect, (iptr)rsi);
    428 			reload_shader_indirect(file, (iptr)rsi, memory);
    429 		}
    430 		os_wake_waiters(&worker->sync_variable);
    431 	}
    432 
    433 	FrameViewRenderContext *fvr = &ctx->frame_view_render_context;
    434 	glCreateFramebuffers(countof(fvr->framebuffers), fvr->framebuffers);
    435 	LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[0], s8("Frame View Framebuffer"));
    436 	LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[1], s8("Frame View Resolving Framebuffer"));
    437 
    438 	glCreateRenderbuffers(countof(fvr->renderbuffers), fvr->renderbuffers);
    439 	i32 msaa_samples = gl_parameters.vendor_id == GLVendor_ARM? 4 : 8;
    440 	glNamedRenderbufferStorageMultisample(fvr->renderbuffers[0], msaa_samples, GL_RGBA8,
    441 	                                      FRAME_VIEW_RENDER_TARGET_SIZE);
    442 	glNamedRenderbufferStorageMultisample(fvr->renderbuffers[1], msaa_samples, GL_DEPTH_COMPONENT24,
    443 	                                      FRAME_VIEW_RENDER_TARGET_SIZE);
    444 
    445 	static_assert(countof(beamformer_reloadable_render_shader_info_indices) == 1,
    446 	              "only a single render shader is currently handled");
    447 	i32 render_rsi_index = beamformer_reloadable_render_shader_info_indices[0];
    448 
    449 	Arena *arena = BakeShaders? &scratch : &memory;
    450 	ShaderReloadContext *render_3d = push_struct(arena, typeof(*render_3d));
    451 	render_3d->beamformer_context    = ctx;
    452 	render_3d->reloadable_info_index = render_rsi_index;
    453 	render_3d->gl_type = GL_FRAGMENT_SHADER;
    454 	render_3d->header  = s8(""
    455 	"layout(location = 0) in  vec3 normal;\n"
    456 	"layout(location = 1) in  vec3 texture_coordinate;\n\n"
    457 	"layout(location = 2) in  vec3 test_texture_coordinate;\n\n"
    458 	"layout(location = 0) out vec4 out_colour;\n\n"
    459 	"layout(location = " str(FRAME_VIEW_DYNAMIC_RANGE_LOC) ") uniform float u_db_cutoff = 60;\n"
    460 	"layout(location = " str(FRAME_VIEW_THRESHOLD_LOC)     ") uniform float u_threshold = 40;\n"
    461 	"layout(location = " str(FRAME_VIEW_GAMMA_LOC)         ") uniform float u_gamma     = 1;\n"
    462 	"layout(location = " str(FRAME_VIEW_LOG_SCALE_LOC)     ") uniform bool  u_log_scale;\n"
    463 	"layout(location = " str(FRAME_VIEW_BB_COLOUR_LOC)     ") uniform vec4  u_bb_colour   = vec4(" str(FRAME_VIEW_BB_COLOUR) ");\n"
    464 	"layout(location = " str(FRAME_VIEW_BB_FRACTION_LOC)   ") uniform float u_bb_fraction = " str(FRAME_VIEW_BB_FRACTION) ";\n"
    465 	"layout(location = " str(FRAME_VIEW_SOLID_BB_LOC)      ") uniform bool  u_solid_bb;\n"
    466 	"\n"
    467 	"layout(binding = 0) uniform sampler3D u_texture;\n");
    468 
    469 	render_3d->link = push_struct(arena, typeof(*render_3d));
    470 	render_3d->link->reloadable_info_index = -1;
    471 	render_3d->link->gl_type = GL_VERTEX_SHADER;
    472 	render_3d->link->link    = render_3d;
    473 	render_3d->link->header  = s8(""
    474 	"layout(location = 0) in vec3 v_position;\n"
    475 	"layout(location = 1) in vec3 v_normal;\n"
    476 	"\n"
    477 	"layout(location = 0) out vec3 f_normal;\n"
    478 	"layout(location = 1) out vec3 f_texture_coordinate;\n"
    479 	"layout(location = 2) out vec3 f_orig_texture_coordinate;\n"
    480 	"\n"
    481 	"layout(location = " str(FRAME_VIEW_MODEL_MATRIX_LOC)  ") uniform mat4  u_model;\n"
    482 	"layout(location = " str(FRAME_VIEW_VIEW_MATRIX_LOC)   ") uniform mat4  u_view;\n"
    483 	"layout(location = " str(FRAME_VIEW_PROJ_MATRIX_LOC)   ") uniform mat4  u_projection;\n"
    484 	"\n"
    485 	"\n"
    486 	"void main()\n"
    487 	"{\n"
    488 	"\tvec3 pos = v_position;\n"
    489 	"\tf_orig_texture_coordinate = (2 * v_position + 1) / 2;\n"
    490 	//"\tif (v_position.y == -1) pos.x = clamp(v_position.x, -u_clip_fraction, u_clip_fraction);\n"
    491 	"\tvec3 tex_coord = (2 * pos + 1) / 2;\n"
    492 	"\tf_texture_coordinate = tex_coord.xzy;\n"
    493 	//"\tf_texture_coordinate = u_swizzle? tex_coord.xzy : tex_coord;\n"
    494 	//"\tf_normal    = normalize(mat3(u_model) * v_normal);\n"
    495 	"\tf_normal    = v_normal;\n"
    496 	"\tgl_Position = u_projection * u_view * u_model * vec4(pos, 1);\n"
    497 	"}\n");
    498 
    499 	s8 render_file = {0};
    500 	if (!BakeShaders) {
    501 		render_file = push_s8_from_parts(&scratch, s8(OS_PATH_SEPARATOR), s8("shaders"),
    502 		                                 beamformer_reloadable_shader_files[render_rsi_index]);
    503 		os_add_file_watch(render_file, reload_shader, (iptr)render_3d);
    504 	}
    505 	reload_shader(render_file, (iptr)render_3d, memory);
    506 
    507 	f32 unit_cube_vertices[] = {
    508 		 0.5f,  0.5f, -0.5f,
    509 		 0.5f,  0.5f, -0.5f,
    510 		 0.5f,  0.5f, -0.5f,
    511 		 0.5f, -0.5f, -0.5f,
    512 		 0.5f, -0.5f, -0.5f,
    513 		 0.5f, -0.5f, -0.5f,
    514 		 0.5f,  0.5f,  0.5f,
    515 		 0.5f,  0.5f,  0.5f,
    516 		 0.5f,  0.5f,  0.5f,
    517 		 0.5f, -0.5f,  0.5f,
    518 		 0.5f, -0.5f,  0.5f,
    519 		 0.5f, -0.5f,  0.5f,
    520 		-0.5f,  0.5f, -0.5f,
    521 		-0.5f,  0.5f, -0.5f,
    522 		-0.5f,  0.5f, -0.5f,
    523 		-0.5f, -0.5f, -0.5f,
    524 		-0.5f, -0.5f, -0.5f,
    525 		-0.5f, -0.5f, -0.5f,
    526 		-0.5f,  0.5f,  0.5f,
    527 		-0.5f,  0.5f,  0.5f,
    528 		-0.5f,  0.5f,  0.5f,
    529 		-0.5f, -0.5f,  0.5f,
    530 		-0.5f, -0.5f,  0.5f,
    531 		-0.5f, -0.5f,  0.5f
    532 	};
    533 	f32 unit_cube_normals[] = {
    534 		 0.0f,  0.0f, -1.0f,
    535 		 0.0f,  1.0f,  0.0f,
    536 		 1.0f,  0.0f,  0.0f,
    537 		 0.0f,  0.0f, -1.0f,
    538 		 0.0f, -1.0f,  0.0f,
    539 		 1.0f,  0.0f,  0.0f,
    540 		 0.0f,  0.0f,  1.0f,
    541 		 0.0f,  1.0f,  0.0f,
    542 		 1.0f,  0.0f,  0.0f,
    543 		 0.0f,  0.0f,  1.0f,
    544 		 0.0f, -1.0f,  0.0f,
    545 		 1.0f,  0.0f,  0.0f,
    546 		 0.0f,  0.0f, -1.0f,
    547 		 0.0f,  1.0f,  0.0f,
    548 		-1.0f,  0.0f,  0.0f,
    549 		 0.0f,  0.0f, -1.0f,
    550 		 0.0f, -1.0f,  0.0f,
    551 		-1.0f,  0.0f,  0.0f,
    552 		 0.0f,  0.0f,  1.0f,
    553 		 0.0f,  1.0f,  0.0f,
    554 		-1.0f,  0.0f,  0.0f,
    555 		 0.0f,  0.0f,  1.0f,
    556 		 0.0f, -1.0f,  0.0f,
    557 		-1.0f,  0.0f,  0.0f
    558 	};
    559 	u16 unit_cube_indices[] = {
    560 		1,  13, 19,
    561 		1,  19, 7,
    562 		9,  6,  18,
    563 		9,  18, 21,
    564 		23, 20, 14,
    565 		23, 14, 17,
    566 		16, 4,  10,
    567 		16, 10, 22,
    568 		5,  2,  8,
    569 		5,  8,  11,
    570 		15, 12, 0,
    571 		15, 0,  3
    572 	};
    573 
    574 	cs->unit_cube_model = render_model_from_arrays(unit_cube_vertices, unit_cube_normals,
    575 	                                               sizeof(unit_cube_vertices),
    576 	                                               unit_cube_indices, countof(unit_cube_indices));
    577 
    578 	memory.end = scratch.end;
    579 	ctx->arena = memory;
    580 }
    581 
    582 function void
    583 beamformer_invalidate_shared_memory(Arena memory)
    584 {
    585 	/* NOTE(rnp): work around pebkac when the beamformer is closed while we are doing live
    586 	 * imaging. if the verasonics is blocked in an external function (calling the library
    587 	 * to start compute) it is impossible for us to get it to properly shut down which
    588 	 * will sometimes result in us needing to power cycle the system. set the shared memory
    589 	 * into an error state and release dispatch lock so that future calls will error instead
    590 	 * of blocking.
    591 	 */
    592 	BeamformerCtx *ctx = BeamformerContextMemory(memory);
    593 	BeamformerSharedMemory *sm = ctx->shared_memory.region;
    594 	BeamformerSharedMemoryLockKind lock = BeamformerSharedMemoryLockKind_DispatchCompute;
    595 	atomic_store_u32(&sm->invalid, 1);
    596 	atomic_store_u32(&sm->external_work_queue.ridx, sm->external_work_queue.widx);
    597 	DEBUG_DECL(if (sm->locks[lock])) {
    598 		os_shared_memory_region_unlock(&ctx->shared_memory, sm->locks, (i32)lock);
    599 	}
    600 
    601 	atomic_or_u32(&sm->live_imaging_dirty_flags, BeamformerLiveImagingDirtyFlags_StopImaging);
    602 }