ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

static.c (21833B)


      1 /* See LICENSE for license details. */
      2 
      3 /* NOTE(rnp): magic variables to force discrete GPU usage on laptops with multiple devices */
      4 EXPORT i32 NvOptimusEnablement = 1;
      5 EXPORT i32 AmdPowerXpressRequestHighPerformance = 1;
      6 
      7 #ifndef _DEBUG
      8 
      9 #include "beamformer.c"
     10 #define debug_init(...)
     11 
     12 #else
     13 
     14 global void *debug_lib;
     15 
     16 #define DEBUG_ENTRY_POINTS \
     17 	X(beamformer_debug_ui_deinit)  \
     18 	X(beamformer_complete_compute) \
     19 	X(beamformer_frame_step)       \
     20 	X(beamformer_reload_shader)    \
     21 	X(beamformer_rf_upload)
     22 
     23 #define X(name) global name ##_fn *name;
     24 DEBUG_ENTRY_POINTS
     25 #undef X
     26 
     27 function FILE_WATCH_CALLBACK_FN(debug_reload)
     28 {
     29 	BeamformerInput *input = (BeamformerInput *)user_data;
     30 	Stream err             = arena_stream(arena);
     31 
     32 	/* NOTE(rnp): spin until compute thread finishes its work (we will probably
     33 	 * never reload while compute is in progress but just incase). */
     34 	spin_wait(!atomic_load_u32(&os->compute_worker.asleep));
     35 	spin_wait(!atomic_load_u32(&os->upload_worker.asleep));
     36 
     37 	os_unload_library(debug_lib);
     38 	debug_lib = os_load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME, &err);
     39 
     40 	#define X(name) name = os_lookup_dynamic_symbol(debug_lib, #name, &err);
     41 	DEBUG_ENTRY_POINTS
     42 	#undef X
     43 
     44 	stream_append_s8(&err, s8("Reloaded Main Executable\n"));
     45 	os_write_file(os->error_handle, stream_to_s8(&err));
     46 
     47 	input->executable_reloaded = 1;
     48 
     49 	return 1;
     50 }
     51 
     52 function void
     53 debug_init(OS *os, iptr input, Arena *arena)
     54 {
     55 	os_add_file_watch(os, arena, s8(OS_DEBUG_LIB_NAME), debug_reload, input);
     56 	debug_reload(os, s8(""), input, *arena);
     57 
     58 	Stream err = arena_stream(*arena);
     59 	void *rdoc = os_get_module(OS_RENDERDOC_SONAME, 0);
     60 	if (rdoc) {
     61 		renderdoc_get_api_fn *get_api = os_lookup_dynamic_symbol(rdoc, "RENDERDOC_GetAPI", &err);
     62 		if (get_api) {
     63 			RenderDocAPI *api = 0;
     64 			if (get_api(10600, (void **)&api)) {
     65 				os->start_frame_capture = RENDERDOC_START_FRAME_CAPTURE(api);
     66 				os->end_frame_capture   = RENDERDOC_END_FRAME_CAPTURE(api);
     67 				stream_append_s8(&err, s8("loaded: " OS_RENDERDOC_SONAME "\n"));
     68 			}
     69 		}
     70 	}
     71 
     72 	os_write_file(os->error_handle, stream_to_s8(&err));
     73 }
     74 
     75 #endif /* _DEBUG */
     76 
     77 #define static_path_join(a, b) (a OS_PATH_SEPARATOR b)
     78 
     79 struct gl_debug_ctx {
     80 	Stream stream;
     81 	iptr   os_error_handle;
     82 };
     83 
     84 function void
     85 gl_debug_logger(u32 src, u32 type, u32 id, u32 lvl, i32 len, const char *msg, const void *userctx)
     86 {
     87 	struct gl_debug_ctx *ctx = (struct gl_debug_ctx *)userctx;
     88 	Stream *e = &ctx->stream;
     89 	stream_append_s8s(e, s8("[OpenGL] "), (s8){.len = len, .data = (u8 *)msg}, s8("\n"));
     90 	os_write_file(ctx->os_error_handle, stream_to_s8(e));
     91 	stream_reset(e, 0);
     92 }
     93 
     94 function void
     95 get_gl_params(GLParams *gl, Stream *err)
     96 {
     97 	char *vendor = (char *)glGetString(GL_VENDOR);
     98 	if (!vendor) {
     99 		stream_append_s8(err, s8("Failed to determine GL Vendor\n"));
    100 		os_fatal(stream_to_s8(err));
    101 	}
    102 	/* TODO(rnp): str prefix of */
    103 	switch (vendor[0]) {
    104 	case 'A': gl->vendor_id = GL_VENDOR_AMD;    break;
    105 	case 'I': gl->vendor_id = GL_VENDOR_INTEL;  break;
    106 	case 'N': gl->vendor_id = GL_VENDOR_NVIDIA; break;
    107 	/* NOTE(rnp): freedreno */
    108 	case 'f': gl->vendor_id = GL_VENDOR_ARM;    break;
    109 	/* NOTE(rnp): Microsoft Corporation - weird win32 thing (microsoft is just using mesa for the driver) */
    110 	case 'M': gl->vendor_id = GL_VENDOR_ARM;    break;
    111 	default:
    112 		stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n"));
    113 		os_fatal(stream_to_s8(err));
    114 	}
    115 
    116 	#define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl->name);
    117 	GL_PARAMETERS
    118 	#undef X
    119 }
    120 
    121 function void
    122 validate_gl_requirements(GLParams *gl, Arena a)
    123 {
    124 	Stream s = arena_stream(a);
    125 
    126 	if (gl->max_ubo_size < (i32)sizeof(BeamformerParameters)) {
    127 		stream_append_s8(&s, s8("GPU must support UBOs of at least "));
    128 		stream_append_i64(&s, sizeof(BeamformerParameters));
    129 		stream_append_s8(&s, s8(" bytes!\n"));
    130 	}
    131 
    132 	#define X(name, ret, params) if (!name) stream_append_s8s(&s, s8("missing required GL function:"), s8(#name), s8("\n"));
    133 	OGLProcedureList
    134 	#undef X
    135 
    136 	if (s.widx) os_fatal(stream_to_s8(&s));
    137 }
    138 
    139 function void
    140 dump_gl_params(GLParams *gl, Arena a, OS *os)
    141 {
    142 #ifdef _DEBUG
    143 	s8 vendor = s8("vendor:");
    144 	i32 max_width = (i32)vendor.len;
    145 	#define X(glname, name, suffix) if (s8(#name).len > max_width) max_width = (i32)s8(#name ":").len;
    146 	GL_PARAMETERS
    147 	#undef X
    148 	max_width++;
    149 
    150 	Stream s = arena_stream(a);
    151 	stream_append_s8s(&s, s8("---- GL Parameters ----\n"), vendor);
    152 	stream_pad(&s, ' ', max_width - (i32)vendor.len);
    153 	switch (gl->vendor_id) {
    154 	case GL_VENDOR_AMD:    stream_append_s8(&s, s8("AMD\n"));    break;
    155 	case GL_VENDOR_ARM:    stream_append_s8(&s, s8("ARM\n"));    break;
    156 	case GL_VENDOR_INTEL:  stream_append_s8(&s, s8("Intel\n"));  break;
    157 	case GL_VENDOR_NVIDIA: stream_append_s8(&s, s8("nVidia\n")); break;
    158 	}
    159 
    160 	#define X(glname, name, suffix) \
    161 		stream_append_s8(&s, s8(#name ":"));                     \
    162 		stream_pad(&s, ' ', max_width - (i32)s8(#name ":").len); \
    163 		stream_append_i64(&s, gl->name);                         \
    164 		stream_append_s8(&s, s8(suffix));                        \
    165 		stream_append_byte(&s, '\n');
    166 	GL_PARAMETERS
    167 	#undef X
    168 	stream_append_s8(&s, s8("-----------------------\n"));
    169 	os_write_file(os->error_handle, stream_to_s8(&s));
    170 #endif
    171 }
    172 
    173 function FILE_WATCH_CALLBACK_FN(reload_shader)
    174 {
    175 	ShaderReloadContext *ctx = (typeof(ctx))user_data;
    176 	return beamformer_reload_shader(os, ctx->beamformer_context, ctx, arena, ctx->name);
    177 }
    178 
    179 function FILE_WATCH_CALLBACK_FN(reload_shader_indirect)
    180 {
    181 	ShaderReloadContext *src = (typeof(src))user_data;
    182 	BeamformerCtx *ctx = src->beamformer_context;
    183 	BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue);
    184 	if (work) {
    185 		work->kind = BeamformerWorkKind_ReloadShader,
    186 		work->shader_reload_context = src;
    187 		beamform_work_queue_push_commit(ctx->beamform_work_queue);
    188 		os_wake_waiters(&os->compute_worker.sync_variable);
    189 	}
    190 	return 1;
    191 }
    192 
    193 function FILE_WATCH_CALLBACK_FN(load_cuda_library)
    194 {
    195 	local_persist void *cuda_library_handle;
    196 
    197 	b32 result = os_file_exists((c8 *)path.data);
    198 	if (result) {
    199 		Stream err = arena_stream(arena);
    200 
    201 		stream_append_s8(&err, s8("loading CUDA library: " OS_CUDA_LIB_NAME "\n"));
    202 		os_unload_library(cuda_library_handle);
    203 		cuda_library_handle = os_load_library((c8 *)path.data, OS_CUDA_LIB_TEMP_NAME, &err);
    204 		#define X(name, symname) cuda_## name = os_lookup_dynamic_symbol(cuda_library_handle, symname, &err);
    205 		CUDALibraryProcedureList
    206 		#undef X
    207 
    208 		os_write_file(os->error_handle, stream_to_s8(&err));
    209 	}
    210 
    211 	#define X(name, symname) if (!cuda_## name) cuda_## name = cuda_ ## name ## _stub;
    212 	CUDALibraryProcedureList
    213 	#undef X
    214 
    215 	return result;
    216 }
    217 
    218 function BeamformerRenderModel
    219 render_model_from_arrays(f32 *vertices, f32 *normals, i32 vertices_size, u16 *indices, i32 index_count)
    220 {
    221 	BeamformerRenderModel result = {0};
    222 
    223 	i32 buffer_size    = vertices_size * 2 + index_count * (i32)sizeof(u16);
    224 	i32 indices_offset = vertices_size * 2;
    225 	i32 indices_size   = index_count * (i32)sizeof(u16);
    226 
    227 	result.elements        = index_count;
    228 	result.elements_offset = indices_offset;
    229 
    230 	glCreateBuffers(1, &result.buffer);
    231 	glNamedBufferStorage(result.buffer, buffer_size, 0, GL_DYNAMIC_STORAGE_BIT);
    232 	glNamedBufferSubData(result.buffer, 0,              vertices_size, vertices);
    233 	glNamedBufferSubData(result.buffer, vertices_size,  vertices_size, normals);
    234 	glNamedBufferSubData(result.buffer, indices_offset, indices_size,  indices);
    235 
    236 	glCreateVertexArrays(1, &result.vao);
    237 	glVertexArrayVertexBuffer(result.vao, 0, result.buffer, 0,             3 * sizeof(f32));
    238 	glVertexArrayVertexBuffer(result.vao, 1, result.buffer, vertices_size, 3 * sizeof(f32));
    239 	glVertexArrayElementBuffer(result.vao, result.buffer);
    240 
    241 	glEnableVertexArrayAttrib(result.vao, 0);
    242 	glEnableVertexArrayAttrib(result.vao, 1);
    243 
    244 	glVertexArrayAttribFormat(result.vao, 0, 3, GL_FLOAT, 0, 0);
    245 	glVertexArrayAttribFormat(result.vao, 1, 3, GL_FLOAT, 0, (u32)vertices_size);
    246 
    247 	glVertexArrayAttribBinding(result.vao, 0, 0);
    248 	glVertexArrayAttribBinding(result.vao, 1, 0);
    249 
    250 	return result;
    251 }
    252 
    253 #define GLFW_VISIBLE 0x00020004
    254 void glfwWindowHint(i32, i32);
    255 iptr glfwCreateWindow(i32, i32, char *, iptr, iptr);
    256 void glfwMakeContextCurrent(iptr);
    257 
    258 function void
    259 worker_thread_sleep(GLWorkerThreadContext *ctx)
    260 {
    261 	for (;;) {
    262 		i32 expected = 0;
    263 		if (atomic_cas_u32(&ctx->sync_variable, &expected, 1))
    264 			break;
    265 
    266 		atomic_store_u32(&ctx->asleep, 1);
    267 		os_wait_on_value(&ctx->sync_variable, 1, (u32)-1);
    268 		atomic_store_u32(&ctx->asleep, 0);
    269 	}
    270 }
    271 
    272 function OS_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point)
    273 {
    274 	GLWorkerThreadContext *ctx = (GLWorkerThreadContext *)_ctx;
    275 
    276 	glfwMakeContextCurrent(ctx->window_handle);
    277 	ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
    278 
    279 	BeamformerCtx *beamformer = (BeamformerCtx *)ctx->user_context;
    280 	glCreateQueries(GL_TIME_ELAPSED, countof(beamformer->compute_context.shader_timer_ids),
    281 	                beamformer->compute_context.shader_timer_ids);
    282 
    283 	for (;;) {
    284 		worker_thread_sleep(ctx);
    285 		asan_poison_region(ctx->arena.beg, ctx->arena.end - ctx->arena.beg);
    286 		beamformer_complete_compute(ctx->user_context, &ctx->arena, ctx->gl_context);
    287 	}
    288 
    289 	unreachable();
    290 
    291 	return 0;
    292 }
    293 
    294 function OS_THREAD_ENTRY_POINT_FN(upload_worker_thread_entry_point)
    295 {
    296 	GLWorkerThreadContext *ctx = (GLWorkerThreadContext *)_ctx;
    297 	glfwMakeContextCurrent(ctx->window_handle);
    298 	ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
    299 
    300 	BeamformerUploadThreadContext *up = (typeof(up))ctx->user_context;
    301 	glCreateQueries(GL_TIMESTAMP, 1, &up->rf_buffer->data_timestamp_query);
    302 	/* NOTE(rnp): start this here so we don't have to worry about it being started or not */
    303 	glQueryCounter(up->rf_buffer->data_timestamp_query, GL_TIMESTAMP);
    304 
    305 	for (;;) {
    306 		worker_thread_sleep(ctx);
    307 		asan_poison_region(ctx->arena.beg, ctx->arena.end - ctx->arena.beg);
    308 		beamformer_rf_upload(up, ctx->arena);
    309 	}
    310 
    311 	unreachable();
    312 
    313 	return 0;
    314 }
    315 
    316 function void
    317 setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input)
    318 {
    319 	Arena  compute_arena = sub_arena(memory, MB(2),  KB(4));
    320 	Arena  upload_arena  = sub_arena(memory, KB(64), KB(4));
    321 	Stream error         = stream_alloc(memory, MB(1));
    322 	Arena  ui_arena      = sub_arena(memory, MB(2), KB(4));
    323 
    324 	BeamformerCtx   *ctx   = *o_ctx   = push_struct(memory, typeof(*ctx));
    325 	BeamformerInput *input = *o_input = push_struct(memory, typeof(*input));
    326 
    327 	ctx->window_size = (iv2){{1280, 840}};
    328 	ctx->error_stream = error;
    329 	ctx->ui_backing_store = ui_arena;
    330 	input->executable_reloaded = 1;
    331 
    332 	os_init(&ctx->os, memory);
    333 	ctx->os.compute_worker.arena  = compute_arena;
    334 	ctx->os.compute_worker.asleep = 1;
    335 	ctx->os.upload_worker.arena   = upload_arena;
    336 	ctx->os.upload_worker.asleep  = 1;
    337 
    338 	debug_init(&ctx->os, (iptr)input, memory);
    339 
    340 	SetConfigFlags(FLAG_VSYNC_HINT|FLAG_WINDOW_ALWAYS_RUN);
    341 	InitWindow(ctx->window_size.w, ctx->window_size.h, "OGL Beamformer");
    342 	/* NOTE: do this after initing so that the window starts out floating in tiling wm */
    343 	SetWindowState(FLAG_WINDOW_RESIZABLE);
    344 	SetWindowMinSize(840, ctx->window_size.h);
    345 
    346 	glfwWindowHint(GLFW_VISIBLE, 0);
    347 	iptr raylib_window_handle = (iptr)GetPlatformWindowHandle();
    348 
    349 	#define X(name, ret, params) name = (name##_fn *)os_gl_proc_address(#name);
    350 	OGLProcedureList
    351 	#undef X
    352 	/* NOTE: Gather information about the GPU */
    353 	get_gl_params(&ctx->gl, &ctx->error_stream);
    354 	dump_gl_params(&ctx->gl, *memory, &ctx->os);
    355 	validate_gl_requirements(&ctx->gl, *memory);
    356 
    357 	ctx->beamform_work_queue  = push_struct(memory, BeamformWorkQueue);
    358 	ctx->compute_shader_stats = push_struct(memory, ComputeShaderStats);
    359 	ctx->compute_timing_table = push_struct(memory, ComputeTimingTable);
    360 
    361 	/* TODO(rnp): I'm not sure if its a good idea to pre-reserve a bunch of semaphores
    362 	 * on w32 but thats what we are doing for now */
    363 	u32 lock_count = BeamformerSharedMemoryLockKind_Count + BeamformerMaxParameterBlockSlots;
    364 	ctx->shared_memory = os_create_shared_memory_area(memory, OS_SHARED_MEMORY_NAME, lock_count,
    365 	                                                  BEAMFORMER_SHARED_MEMORY_SIZE);
    366 	BeamformerSharedMemory *sm = ctx->shared_memory.region;
    367 	if (!sm) os_fatal(s8("Get more ram lol\n"));
    368 	mem_clear(sm, 0, sizeof(*sm));
    369 
    370 	sm->version = BEAMFORMER_SHARED_MEMORY_VERSION;
    371 	sm->reserved_parameter_blocks = 1;
    372 
    373 	BeamformerComputeContext *cs = &ctx->compute_context;
    374 
    375 	GLWorkerThreadContext *worker = &ctx->os.compute_worker;
    376 	/* TODO(rnp): we should lock this down after we have something working */
    377 	worker->user_context  = (iptr)ctx;
    378 	worker->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle);
    379 	worker->handle        = os_create_thread(*memory, (iptr)worker, s8("[compute]"),
    380 	                                         compute_worker_thread_entry_point);
    381 
    382 	GLWorkerThreadContext         *upload = &ctx->os.upload_worker;
    383 	BeamformerUploadThreadContext *upctx  = push_struct(memory, typeof(*upctx));
    384 	upload->user_context = (iptr)upctx;
    385 	upctx->rf_buffer     = &cs->rf_buffer;
    386 	upctx->shared_memory = &ctx->shared_memory;
    387 	upctx->compute_timing_table = ctx->compute_timing_table;
    388 	upctx->compute_worker_sync  = &ctx->os.compute_worker.sync_variable;
    389 	upload->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle);
    390 	upload->handle        = os_create_thread(*memory, (iptr)upload, s8("[upload]"),
    391 	                                         upload_worker_thread_entry_point);
    392 
    393 	glfwMakeContextCurrent(raylib_window_handle);
    394 
    395 	#define X(name, ...) cuda_## name = cuda_## name ##_stub;
    396 	CUDALibraryProcedureList
    397 	#undef X
    398 	if (ctx->gl.vendor_id == GL_VENDOR_NVIDIA
    399 	    && load_cuda_library(&ctx->os, s8(OS_CUDA_LIB_NAME), 0, *memory))
    400 	{
    401 		os_add_file_watch(&ctx->os, memory, s8(OS_CUDA_LIB_NAME), load_cuda_library, 0);
    402 	}
    403 
    404 	/* NOTE: set up OpenGL debug logging */
    405 	struct gl_debug_ctx *gl_debug_ctx = push_struct(memory, typeof(*gl_debug_ctx));
    406 	gl_debug_ctx->stream          = stream_alloc(memory, 1024);
    407 	gl_debug_ctx->os_error_handle = ctx->os.error_handle;
    408 	glDebugMessageCallback(gl_debug_logger, gl_debug_ctx);
    409 #ifdef _DEBUG
    410 	glEnable(GL_DEBUG_OUTPUT);
    411 #endif
    412 
    413 	read_only local_persist s8 compute_headers[BeamformerShaderKind_ComputeCount] = {
    414 		/* X(name, type, gltype) */
    415 		#define X(name, t, gltype) "\t" #gltype " " #name ";\n"
    416 		[BeamformerShaderKind_DAS] = s8_comp("layout(std140, binding = 0) uniform parameters {\n"
    417 			BEAMFORMER_DAS_UBO_PARAM_LIST
    418 			"};\n\n"
    419 		),
    420 		[BeamformerShaderKind_Decode] = s8_comp("layout(std140, binding = 0) uniform parameters {\n"
    421 			BEAMFORMER_DECODE_UBO_PARAM_LIST
    422 			"};\n\n"
    423 		),
    424 		[BeamformerShaderKind_Filter] = s8_comp("layout(std140, binding = 0) uniform parameters {\n"
    425 			BEAMFORMER_FILTER_UBO_PARAM_LIST
    426 			"};\n\n"
    427 		),
    428 		#undef X
    429 	};
    430 
    431 	#define X(e, f, ...) do if (s8(f).len > 0) { \
    432 		ShaderReloadContext *src = push_struct(memory, typeof(*src)); \
    433 		src->beamformer_context  = ctx;                               \
    434 		src->header  = compute_headers[BeamformerShaderKind_##e];     \
    435 		src->path    = s8(static_path_join("shaders", f ".glsl"));    \
    436 		src->name    = src->path;                                     \
    437 		src->shader  = cs->programs + BeamformerShaderKind_##e;       \
    438 		src->gl_type = GL_COMPUTE_SHADER;                             \
    439 		src->kind    = BeamformerShaderKind_##e;                      \
    440 		src->link    = src;                                           \
    441 		os_add_file_watch(&ctx->os, memory, src->path, reload_shader_indirect, (iptr)src); \
    442 		reload_shader_indirect(&ctx->os, src->path, (iptr)src, *memory); \
    443 	} while (0);
    444 	COMPUTE_SHADERS_INTERNAL
    445 	#undef X
    446 	os_wake_waiters(&worker->sync_variable);
    447 
    448 	FrameViewRenderContext *fvr = &ctx->frame_view_render_context;
    449 	glCreateFramebuffers(countof(fvr->framebuffers), fvr->framebuffers);
    450 	LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[0], s8("Frame View Framebuffer"));
    451 	LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[1], s8("Frame View Resolving Framebuffer"));
    452 
    453 	glCreateRenderbuffers(countof(fvr->renderbuffers), fvr->renderbuffers);
    454 	i32 msaa_samples = ctx->gl.vendor_id == GL_VENDOR_ARM? 4 : 8;
    455 	glNamedRenderbufferStorageMultisample(fvr->renderbuffers[0], msaa_samples, GL_RGBA8,
    456 	                                      FRAME_VIEW_RENDER_TARGET_SIZE);
    457 	glNamedRenderbufferStorageMultisample(fvr->renderbuffers[1], msaa_samples, GL_DEPTH_COMPONENT24,
    458 	                                      FRAME_VIEW_RENDER_TARGET_SIZE);
    459 
    460 	ShaderReloadContext *render_3d = push_struct(memory, typeof(*render_3d));
    461 	render_3d->beamformer_context = ctx;
    462 	render_3d->path    = s8(static_path_join("shaders", "render_3d.frag.glsl"));
    463 	render_3d->name    = s8("shaders/render_3d.glsl");
    464 	render_3d->gl_type = GL_FRAGMENT_SHADER;
    465 	render_3d->kind    = BeamformerShaderKind_Render3D;
    466 	render_3d->shader  = &fvr->shader;
    467 	render_3d->header  = s8(""
    468 	"layout(location = 0) in  vec3 normal;\n"
    469 	"layout(location = 1) in  vec3 texture_coordinate;\n\n"
    470 	"layout(location = 2) in  vec3 test_texture_coordinate;\n\n"
    471 	"layout(location = 0) out vec4 out_colour;\n\n"
    472 	"layout(location = " str(FRAME_VIEW_DYNAMIC_RANGE_LOC) ") uniform float u_db_cutoff = 60;\n"
    473 	"layout(location = " str(FRAME_VIEW_THRESHOLD_LOC)     ") uniform float u_threshold = 40;\n"
    474 	"layout(location = " str(FRAME_VIEW_GAMMA_LOC)         ") uniform float u_gamma     = 1;\n"
    475 	"layout(location = " str(FRAME_VIEW_LOG_SCALE_LOC)     ") uniform bool  u_log_scale;\n"
    476 	"layout(location = " str(FRAME_VIEW_BB_COLOUR_LOC)     ") uniform vec4  u_bb_colour   = vec4(" str(FRAME_VIEW_BB_COLOUR) ");\n"
    477 	"layout(location = " str(FRAME_VIEW_BB_FRACTION_LOC)   ") uniform float u_bb_fraction = " str(FRAME_VIEW_BB_FRACTION) ";\n"
    478 	"layout(location = " str(FRAME_VIEW_SOLID_BB_LOC)      ") uniform bool  u_solid_bb;\n"
    479 	"\n"
    480 	"layout(binding = 0) uniform sampler3D u_texture;\n");
    481 
    482 	render_3d->link = push_struct(memory, typeof(*render_3d));
    483 	render_3d->link->gl_type = GL_VERTEX_SHADER;
    484 	render_3d->link->link    = render_3d;
    485 	render_3d->link->header  = s8(""
    486 	"layout(location = 0) in vec3 v_position;\n"
    487 	"layout(location = 1) in vec3 v_normal;\n"
    488 	"\n"
    489 	"layout(location = 0) out vec3 f_normal;\n"
    490 	"layout(location = 1) out vec3 f_texture_coordinate;\n"
    491 	"layout(location = 2) out vec3 f_orig_texture_coordinate;\n"
    492 	"\n"
    493 	"layout(location = " str(FRAME_VIEW_MODEL_MATRIX_LOC)  ") uniform mat4  u_model;\n"
    494 	"layout(location = " str(FRAME_VIEW_VIEW_MATRIX_LOC)   ") uniform mat4  u_view;\n"
    495 	"layout(location = " str(FRAME_VIEW_PROJ_MATRIX_LOC)   ") uniform mat4  u_projection;\n"
    496 	"\n"
    497 	"\n"
    498 	"void main()\n"
    499 	"{\n"
    500 	"\tvec3 pos = v_position;\n"
    501 	"\tf_orig_texture_coordinate = (2 * v_position + 1) / 2;\n"
    502 	//"\tif (v_position.y == -1) pos.x = clamp(v_position.x, -u_clip_fraction, u_clip_fraction);\n"
    503 	"\tvec3 tex_coord = (2 * pos + 1) / 2;\n"
    504 	"\tf_texture_coordinate = tex_coord.xzy;\n"
    505 	//"\tf_texture_coordinate = u_swizzle? tex_coord.xzy : tex_coord;\n"
    506 	//"\tf_normal    = normalize(mat3(u_model) * v_normal);\n"
    507 	"\tf_normal    = v_normal;\n"
    508 	"\tgl_Position = u_projection * u_view * u_model * vec4(pos, 1);\n"
    509 	"}\n");
    510 	reload_shader(&ctx->os, render_3d->path, (iptr)render_3d, *memory);
    511 	os_add_file_watch(&ctx->os, memory, render_3d->path, reload_shader, (iptr)render_3d);
    512 
    513 	f32 unit_cube_vertices[] = {
    514 		 0.5f,  0.5f, -0.5f,
    515 		 0.5f,  0.5f, -0.5f,
    516 		 0.5f,  0.5f, -0.5f,
    517 		 0.5f, -0.5f, -0.5f,
    518 		 0.5f, -0.5f, -0.5f,
    519 		 0.5f, -0.5f, -0.5f,
    520 		 0.5f,  0.5f,  0.5f,
    521 		 0.5f,  0.5f,  0.5f,
    522 		 0.5f,  0.5f,  0.5f,
    523 		 0.5f, -0.5f,  0.5f,
    524 		 0.5f, -0.5f,  0.5f,
    525 		 0.5f, -0.5f,  0.5f,
    526 		-0.5f,  0.5f, -0.5f,
    527 		-0.5f,  0.5f, -0.5f,
    528 		-0.5f,  0.5f, -0.5f,
    529 		-0.5f, -0.5f, -0.5f,
    530 		-0.5f, -0.5f, -0.5f,
    531 		-0.5f, -0.5f, -0.5f,
    532 		-0.5f,  0.5f,  0.5f,
    533 		-0.5f,  0.5f,  0.5f,
    534 		-0.5f,  0.5f,  0.5f,
    535 		-0.5f, -0.5f,  0.5f,
    536 		-0.5f, -0.5f,  0.5f,
    537 		-0.5f, -0.5f,  0.5f
    538 	};
    539 	f32 unit_cube_normals[] = {
    540 		 0.0f,  0.0f, -1.0f,
    541 		 0.0f,  1.0f,  0.0f,
    542 		 1.0f,  0.0f,  0.0f,
    543 		 0.0f,  0.0f, -1.0f,
    544 		 0.0f, -1.0f,  0.0f,
    545 		 1.0f,  0.0f,  0.0f,
    546 		 0.0f,  0.0f,  1.0f,
    547 		 0.0f,  1.0f,  0.0f,
    548 		 1.0f,  0.0f,  0.0f,
    549 		 0.0f,  0.0f,  1.0f,
    550 		 0.0f, -1.0f,  0.0f,
    551 		 1.0f,  0.0f,  0.0f,
    552 		 0.0f,  0.0f, -1.0f,
    553 		 0.0f,  1.0f,  0.0f,
    554 		-1.0f,  0.0f,  0.0f,
    555 		 0.0f,  0.0f, -1.0f,
    556 		 0.0f, -1.0f,  0.0f,
    557 		-1.0f,  0.0f,  0.0f,
    558 		 0.0f,  0.0f,  1.0f,
    559 		 0.0f,  1.0f,  0.0f,
    560 		-1.0f,  0.0f,  0.0f,
    561 		 0.0f,  0.0f,  1.0f,
    562 		 0.0f, -1.0f,  0.0f,
    563 		-1.0f,  0.0f,  0.0f
    564 	};
    565 	u16 unit_cube_indices[] = {
    566 		1,  13, 19,
    567 		1,  19, 7,
    568 		9,  6,  18,
    569 		9,  18, 21,
    570 		23, 20, 14,
    571 		23, 14, 17,
    572 		16, 4,  10,
    573 		16, 10, 22,
    574 		5,  2,  8,
    575 		5,  8,  11,
    576 		15, 12, 0,
    577 		15, 0,  3
    578 	};
    579 
    580 	cs->unit_cube_model = render_model_from_arrays(unit_cube_vertices, unit_cube_normals,
    581 	                                               sizeof(unit_cube_vertices),
    582 	                                               unit_cube_indices, countof(unit_cube_indices));
    583 
    584 	/* stfu gcc this is used */
    585 	DEBUG_DECL((void)BeamformerParameterBlockRegionOffsets;)
    586 }
    587 
    588 function void
    589 beamformer_invalidate_shared_memory(BeamformerCtx *ctx)
    590 {
    591 	/* NOTE(rnp): work around pebkac when the beamformer is closed while we are doing live
    592 	 * imaging. if the verasonics is blocked in an external function (calling the library
    593 	 * to start compute) it is impossible for us to get it to properly shut down which
    594 	 * will sometimes result in us needing to power cycle the system. set the shared memory
    595 	 * into an error state and release dispatch lock so that future calls will error instead
    596 	 * of blocking.
    597 	 */
    598 	BeamformerSharedMemory *sm = ctx->shared_memory.region;
    599 	BeamformerSharedMemoryLockKind lock = BeamformerSharedMemoryLockKind_DispatchCompute;
    600 	atomic_store_u32(&sm->invalid, 1);
    601 	atomic_store_u32(&sm->external_work_queue.ridx, sm->external_work_queue.widx);
    602 	DEBUG_DECL(if (sm->locks[lock])) {
    603 		os_shared_memory_region_unlock(&ctx->shared_memory, sm->locks, (i32)lock);
    604 	}
    605 
    606 	atomic_or_u32(&sm->live_imaging_dirty_flags, BeamformerLiveImagingDirtyFlags_StopImaging);
    607 }