ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

static.c (22383B)


      1 /* See LICENSE for license details. */
      2 
      3 /* NOTE(rnp): magic variables to force discrete GPU usage on laptops with multiple devices */
      4 EXPORT i32 NvOptimusEnablement = 1;
      5 EXPORT i32 AmdPowerXpressRequestHighPerformance = 1;
      6 
      7 #ifndef _DEBUG
      8 
      9 #include "beamformer.c"
     10 #define debug_init(...)
     11 
     12 #else
     13 
     14 global void *debug_lib;
     15 
     16 #define DEBUG_ENTRY_POINTS \
     17 	X(beamformer_debug_ui_deinit)  \
     18 	X(beamformer_complete_compute) \
     19 	X(beamformer_frame_step)       \
     20 	X(beamformer_reload_shader)    \
     21 	X(beamformer_rf_upload)
     22 
     23 #define X(name) global name ##_fn *name;
     24 DEBUG_ENTRY_POINTS
     25 #undef X
     26 
     27 function FILE_WATCH_CALLBACK_FN(debug_reload)
     28 {
     29 	BeamformerInput *input = (BeamformerInput *)user_data;
     30 	Stream err             = arena_stream(arena);
     31 
     32 	/* NOTE(rnp): spin until compute thread finishes its work (we will probably
     33 	 * never reload while compute is in progress but just incase). */
     34 	spin_wait(!atomic_load_u32(&os->compute_worker.asleep));
     35 	spin_wait(!atomic_load_u32(&os->upload_worker.asleep));
     36 
     37 	os_unload_library(debug_lib);
     38 	debug_lib = os_load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME, &err);
     39 
     40 	#define X(name) name = os_lookup_dynamic_symbol(debug_lib, #name, &err);
     41 	DEBUG_ENTRY_POINTS
     42 	#undef X
     43 
     44 	stream_append_s8(&err, s8("Reloaded Main Executable\n"));
     45 	os_write_file(os->error_handle, stream_to_s8(&err));
     46 
     47 	input->executable_reloaded = 1;
     48 
     49 	return 1;
     50 }
     51 
     52 function void
     53 debug_init(OS *os, iptr input, Arena *arena)
     54 {
     55 	os_add_file_watch(os, arena, s8(OS_DEBUG_LIB_NAME), debug_reload, input);
     56 	debug_reload(os, s8(""), input, *arena);
     57 
     58 	Stream err = arena_stream(*arena);
     59 	void *rdoc = os_get_module(OS_RENDERDOC_SONAME, 0);
     60 	if (rdoc) {
     61 		renderdoc_get_api_fn *get_api = os_lookup_dynamic_symbol(rdoc, "RENDERDOC_GetAPI", &err);
     62 		if (get_api) {
     63 			RenderDocAPI *api = 0;
     64 			if (get_api(10600, (void **)&api)) {
     65 				os->start_frame_capture = RENDERDOC_START_FRAME_CAPTURE(api);
     66 				os->end_frame_capture   = RENDERDOC_END_FRAME_CAPTURE(api);
     67 				stream_append_s8(&err, s8("loaded: " OS_RENDERDOC_SONAME "\n"));
     68 			}
     69 		}
     70 	}
     71 
     72 	os_write_file(os->error_handle, stream_to_s8(&err));
     73 }
     74 
     75 #endif /* _DEBUG */
     76 
     77 struct gl_debug_ctx {
     78 	Stream stream;
     79 	iptr   os_error_handle;
     80 };
     81 
     82 function void
     83 gl_debug_logger(u32 src, u32 type, u32 id, u32 lvl, i32 len, const char *msg, const void *userctx)
     84 {
     85 	struct gl_debug_ctx *ctx = (struct gl_debug_ctx *)userctx;
     86 	Stream *e = &ctx->stream;
     87 	stream_append_s8s(e, s8("[OpenGL] "), (s8){.len = len, .data = (u8 *)msg}, s8("\n"));
     88 	os_write_file(ctx->os_error_handle, stream_to_s8(e));
     89 	stream_reset(e, 0);
     90 }
     91 
     92 function void
     93 get_gl_params(GLParams *gl, Stream *err)
     94 {
     95 	char *vendor = (char *)glGetString(GL_VENDOR);
     96 	if (!vendor) {
     97 		stream_append_s8(err, s8("Failed to determine GL Vendor\n"));
     98 		os_fatal(stream_to_s8(err));
     99 	}
    100 	/* TODO(rnp): str prefix of */
    101 	switch (vendor[0]) {
    102 	case 'A': gl->vendor_id = GL_VENDOR_AMD;    break;
    103 	case 'I': gl->vendor_id = GL_VENDOR_INTEL;  break;
    104 	case 'N': gl->vendor_id = GL_VENDOR_NVIDIA; break;
    105 	/* NOTE(rnp): freedreno */
    106 	case 'f': gl->vendor_id = GL_VENDOR_ARM;    break;
    107 	/* NOTE(rnp): Microsoft Corporation - weird win32 thing (microsoft is just using mesa for the driver) */
    108 	case 'M': gl->vendor_id = GL_VENDOR_ARM;    break;
    109 	default:
    110 		stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n"));
    111 		os_fatal(stream_to_s8(err));
    112 	}
    113 
    114 	#define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl->name);
    115 	GL_PARAMETERS
    116 	#undef X
    117 }
    118 
    119 function void
    120 validate_gl_requirements(GLParams *gl, Arena a)
    121 {
    122 	Stream s = arena_stream(a);
    123 
    124 	if (gl->max_ubo_size < (i32)sizeof(BeamformerParameters)) {
    125 		stream_append_s8(&s, s8("GPU must support UBOs of at least "));
    126 		stream_append_i64(&s, sizeof(BeamformerParameters));
    127 		stream_append_s8(&s, s8(" bytes!\n"));
    128 	}
    129 
    130 	#define X(name, ret, params) if (!name) stream_append_s8s(&s, s8("missing required GL function:"), s8(#name), s8("\n"));
    131 	OGLProcedureList
    132 	#undef X
    133 
    134 	if (s.widx) os_fatal(stream_to_s8(&s));
    135 }
    136 
    137 function void
    138 dump_gl_params(GLParams *gl, Arena a, OS *os)
    139 {
    140 #ifdef _DEBUG
    141 	s8 vendor = s8("vendor:");
    142 	i32 max_width = (i32)vendor.len;
    143 	#define X(glname, name, suffix) if (s8(#name).len > max_width) max_width = (i32)s8(#name ":").len;
    144 	GL_PARAMETERS
    145 	#undef X
    146 	max_width++;
    147 
    148 	Stream s = arena_stream(a);
    149 	stream_append_s8s(&s, s8("---- GL Parameters ----\n"), vendor);
    150 	stream_pad(&s, ' ', max_width - (i32)vendor.len);
    151 	switch (gl->vendor_id) {
    152 	case GL_VENDOR_AMD:    stream_append_s8(&s, s8("AMD\n"));    break;
    153 	case GL_VENDOR_ARM:    stream_append_s8(&s, s8("ARM\n"));    break;
    154 	case GL_VENDOR_INTEL:  stream_append_s8(&s, s8("Intel\n"));  break;
    155 	case GL_VENDOR_NVIDIA: stream_append_s8(&s, s8("nVidia\n")); break;
    156 	}
    157 
    158 	#define X(glname, name, suffix) \
    159 		stream_append_s8(&s, s8(#name ":"));                     \
    160 		stream_pad(&s, ' ', max_width - (i32)s8(#name ":").len); \
    161 		stream_append_i64(&s, gl->name);                         \
    162 		stream_append_s8(&s, s8(suffix));                        \
    163 		stream_append_byte(&s, '\n');
    164 	GL_PARAMETERS
    165 	#undef X
    166 	stream_append_s8(&s, s8("-----------------------\n"));
    167 	os_write_file(os->error_handle, stream_to_s8(&s));
    168 #endif
    169 }
    170 
    171 function FILE_WATCH_CALLBACK_FN(reload_shader)
    172 {
    173 	ShaderReloadContext            *ctx = (typeof(ctx))user_data;
    174 	BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + ctx->reloadable_info_index;
    175 	return beamformer_reload_shader(os, path, ctx, arena, beamformer_shader_names[rsi->kind]);
    176 }
    177 
    178 function FILE_WATCH_CALLBACK_FN(reload_shader_indirect)
    179 {
    180 	ShaderReloadContext *src = (typeof(src))user_data;
    181 	BeamformerCtx *ctx = src->beamformer_context;
    182 	BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue);
    183 	if (work) {
    184 		work->kind = BeamformerWorkKind_ReloadShader,
    185 		work->shader_reload_context = src;
    186 		beamform_work_queue_push_commit(ctx->beamform_work_queue);
    187 		os_wake_waiters(&os->compute_worker.sync_variable);
    188 	}
    189 	return 1;
    190 }
    191 
    192 function FILE_WATCH_CALLBACK_FN(load_cuda_library)
    193 {
    194 	local_persist void *cuda_library_handle;
    195 
    196 	b32 result = os_file_exists((c8 *)path.data);
    197 	if (result) {
    198 		Stream err = arena_stream(arena);
    199 
    200 		stream_append_s8(&err, s8("loading CUDA library: " OS_CUDA_LIB_NAME "\n"));
    201 		os_unload_library(cuda_library_handle);
    202 		cuda_library_handle = os_load_library((c8 *)path.data, OS_CUDA_LIB_TEMP_NAME, &err);
    203 		#define X(name, symname) cuda_## name = os_lookup_dynamic_symbol(cuda_library_handle, symname, &err);
    204 		CUDALibraryProcedureList
    205 		#undef X
    206 
    207 		os_write_file(os->error_handle, stream_to_s8(&err));
    208 	}
    209 
    210 	#define X(name, symname) if (!cuda_## name) cuda_## name = cuda_ ## name ## _stub;
    211 	CUDALibraryProcedureList
    212 	#undef X
    213 
    214 	return result;
    215 }
    216 
    217 function BeamformerRenderModel
    218 render_model_from_arrays(f32 *vertices, f32 *normals, i32 vertices_size, u16 *indices, i32 index_count)
    219 {
    220 	BeamformerRenderModel result = {0};
    221 
    222 	i32 buffer_size    = vertices_size * 2 + index_count * (i32)sizeof(u16);
    223 	i32 indices_offset = vertices_size * 2;
    224 	i32 indices_size   = index_count * (i32)sizeof(u16);
    225 
    226 	result.elements        = index_count;
    227 	result.elements_offset = indices_offset;
    228 
    229 	glCreateBuffers(1, &result.buffer);
    230 	glNamedBufferStorage(result.buffer, buffer_size, 0, GL_DYNAMIC_STORAGE_BIT);
    231 	glNamedBufferSubData(result.buffer, 0,              vertices_size, vertices);
    232 	glNamedBufferSubData(result.buffer, vertices_size,  vertices_size, normals);
    233 	glNamedBufferSubData(result.buffer, indices_offset, indices_size,  indices);
    234 
    235 	glCreateVertexArrays(1, &result.vao);
    236 	glVertexArrayVertexBuffer(result.vao, 0, result.buffer, 0,             3 * sizeof(f32));
    237 	glVertexArrayVertexBuffer(result.vao, 1, result.buffer, vertices_size, 3 * sizeof(f32));
    238 	glVertexArrayElementBuffer(result.vao, result.buffer);
    239 
    240 	glEnableVertexArrayAttrib(result.vao, 0);
    241 	glEnableVertexArrayAttrib(result.vao, 1);
    242 
    243 	glVertexArrayAttribFormat(result.vao, 0, 3, GL_FLOAT, 0, 0);
    244 	glVertexArrayAttribFormat(result.vao, 1, 3, GL_FLOAT, 0, (u32)vertices_size);
    245 
    246 	glVertexArrayAttribBinding(result.vao, 0, 0);
    247 	glVertexArrayAttribBinding(result.vao, 1, 0);
    248 
    249 	return result;
    250 }
    251 
    252 #define GLFW_VISIBLE 0x00020004
    253 void glfwWindowHint(i32, i32);
    254 iptr glfwCreateWindow(i32, i32, char *, iptr, iptr);
    255 void glfwMakeContextCurrent(iptr);
    256 
    257 function void
    258 worker_thread_sleep(GLWorkerThreadContext *ctx, BeamformerSharedMemory *sm)
    259 {
    260 	for (;;) {
    261 		i32 expected = 0;
    262 		if (atomic_cas_u32(&ctx->sync_variable, &expected, 1))
    263 			break;
    264 
    265 		if (!atomic_load_u32(&sm->live_imaging_parameters.active)) {
    266 			atomic_store_u32(&ctx->asleep, 1);
    267 			os_wait_on_value(&ctx->sync_variable, 1, (u32)-1);
    268 			atomic_store_u32(&ctx->asleep, 0);
    269 		}
    270 	}
    271 }
    272 
    273 function OS_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point)
    274 {
    275 	GLWorkerThreadContext *ctx = (GLWorkerThreadContext *)_ctx;
    276 
    277 	glfwMakeContextCurrent(ctx->window_handle);
    278 	ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
    279 
    280 	BeamformerCtx *beamformer = (BeamformerCtx *)ctx->user_context;
    281 	glCreateQueries(GL_TIME_ELAPSED, countof(beamformer->compute_context.shader_timer_ids),
    282 	                beamformer->compute_context.shader_timer_ids);
    283 
    284 	for (;;) {
    285 		worker_thread_sleep(ctx, beamformer->shared_memory.region);
    286 		asan_poison_region(ctx->arena.beg, ctx->arena.end - ctx->arena.beg);
    287 		beamformer_complete_compute(ctx->user_context, &ctx->arena, ctx->gl_context);
    288 	}
    289 
    290 	unreachable();
    291 
    292 	return 0;
    293 }
    294 
    295 function OS_THREAD_ENTRY_POINT_FN(upload_worker_thread_entry_point)
    296 {
    297 	GLWorkerThreadContext *ctx = (GLWorkerThreadContext *)_ctx;
    298 	glfwMakeContextCurrent(ctx->window_handle);
    299 	ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
    300 
    301 	BeamformerUploadThreadContext *up = (typeof(up))ctx->user_context;
    302 	glCreateQueries(GL_TIMESTAMP, 1, &up->rf_buffer->data_timestamp_query);
    303 	/* NOTE(rnp): start this here so we don't have to worry about it being started or not */
    304 	glQueryCounter(up->rf_buffer->data_timestamp_query, GL_TIMESTAMP);
    305 
    306 	for (;;) {
    307 		worker_thread_sleep(ctx, up->shared_memory->region);
    308 		asan_poison_region(ctx->arena.beg, ctx->arena.end - ctx->arena.beg);
    309 		beamformer_rf_upload(up, ctx->arena);
    310 	}
    311 
    312 	unreachable();
    313 
    314 	return 0;
    315 }
    316 
    317 function void
    318 setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input)
    319 {
    320 	Arena  compute_arena = sub_arena(memory, MB(2),  KB(4));
    321 	Arena  upload_arena  = sub_arena(memory, KB(64), KB(4));
    322 	Stream error         = stream_alloc(memory, MB(1));
    323 	Arena  ui_arena      = sub_arena(memory, MB(2), KB(4));
    324 
    325 	Arena scratch = {.beg = memory->end - 4096L, .end = memory->end};
    326 	memory->end = scratch.beg;
    327 
    328 	BeamformerCtx   *ctx   = *o_ctx   = push_struct(memory, typeof(*ctx));
    329 	BeamformerInput *input = *o_input = push_struct(memory, typeof(*input));
    330 
    331 	ctx->window_size = (iv2){{1280, 840}};
    332 	ctx->error_stream = error;
    333 	ctx->ui_backing_store = ui_arena;
    334 	input->executable_reloaded = 1;
    335 
    336 	os_init(&ctx->os, memory);
    337 	ctx->os.path_separator        = s8(OS_PATH_SEPARATOR);
    338 	ctx->os.compute_worker.arena  = compute_arena;
    339 	ctx->os.compute_worker.asleep = 1;
    340 	ctx->os.upload_worker.arena   = upload_arena;
    341 	ctx->os.upload_worker.asleep  = 1;
    342 
    343 	debug_init(&ctx->os, (iptr)input, memory);
    344 
    345 	SetConfigFlags(FLAG_VSYNC_HINT|FLAG_WINDOW_ALWAYS_RUN);
    346 	InitWindow(ctx->window_size.w, ctx->window_size.h, "OGL Beamformer");
    347 	/* NOTE: do this after initing so that the window starts out floating in tiling wm */
    348 	SetWindowState(FLAG_WINDOW_RESIZABLE);
    349 	SetWindowMinSize(840, ctx->window_size.h);
    350 
    351 	glfwWindowHint(GLFW_VISIBLE, 0);
    352 	iptr raylib_window_handle = (iptr)GetPlatformWindowHandle();
    353 
    354 	#define X(name, ret, params) name = (name##_fn *)os_gl_proc_address(#name);
    355 	OGLProcedureList
    356 	#undef X
    357 	/* NOTE: Gather information about the GPU */
    358 	get_gl_params(&ctx->gl, &ctx->error_stream);
    359 	dump_gl_params(&ctx->gl, *memory, &ctx->os);
    360 	validate_gl_requirements(&ctx->gl, *memory);
    361 
    362 	ctx->beamform_work_queue  = push_struct(memory, BeamformWorkQueue);
    363 	ctx->compute_shader_stats = push_struct(memory, ComputeShaderStats);
    364 	ctx->compute_timing_table = push_struct(memory, ComputeTimingTable);
    365 
    366 	/* TODO(rnp): I'm not sure if its a good idea to pre-reserve a bunch of semaphores
    367 	 * on w32 but thats what we are doing for now */
    368 	u32 lock_count = (u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots;
    369 	ctx->shared_memory = os_create_shared_memory_area(memory, OS_SHARED_MEMORY_NAME, lock_count,
    370 	                                                  BEAMFORMER_SHARED_MEMORY_SIZE);
    371 	BeamformerSharedMemory *sm = ctx->shared_memory.region;
    372 	if (!sm) os_fatal(s8("Get more ram lol\n"));
    373 	mem_clear(sm, 0, sizeof(*sm));
    374 
    375 	sm->version = BEAMFORMER_SHARED_MEMORY_VERSION;
    376 	sm->reserved_parameter_blocks = 1;
    377 
    378 	BeamformerComputeContext *cs = &ctx->compute_context;
    379 
    380 	GLWorkerThreadContext *worker = &ctx->os.compute_worker;
    381 	/* TODO(rnp): we should lock this down after we have something working */
    382 	worker->user_context  = (iptr)ctx;
    383 	worker->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle);
    384 	worker->handle        = os_create_thread(*memory, (iptr)worker, s8("[compute]"),
    385 	                                         compute_worker_thread_entry_point);
    386 
    387 	GLWorkerThreadContext         *upload = &ctx->os.upload_worker;
    388 	BeamformerUploadThreadContext *upctx  = push_struct(memory, typeof(*upctx));
    389 	upload->user_context = (iptr)upctx;
    390 	upctx->rf_buffer     = &cs->rf_buffer;
    391 	upctx->shared_memory = &ctx->shared_memory;
    392 	upctx->compute_timing_table = ctx->compute_timing_table;
    393 	upctx->compute_worker_sync  = &ctx->os.compute_worker.sync_variable;
    394 	upload->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle);
    395 	upload->handle        = os_create_thread(*memory, (iptr)upload, s8("[upload]"),
    396 	                                         upload_worker_thread_entry_point);
    397 
    398 	glfwMakeContextCurrent(raylib_window_handle);
    399 
    400 	#define X(name, ...) cuda_## name = cuda_## name ##_stub;
    401 	CUDALibraryProcedureList
    402 	#undef X
    403 	if (ctx->gl.vendor_id == GL_VENDOR_NVIDIA
    404 	    && load_cuda_library(&ctx->os, s8(OS_CUDA_LIB_NAME), 0, *memory))
    405 	{
    406 		os_add_file_watch(&ctx->os, memory, s8(OS_CUDA_LIB_NAME), load_cuda_library, 0);
    407 	}
    408 
    409 	/* NOTE: set up OpenGL debug logging */
    410 	struct gl_debug_ctx *gl_debug_ctx = push_struct(memory, typeof(*gl_debug_ctx));
    411 	gl_debug_ctx->stream          = stream_alloc(memory, 1024);
    412 	gl_debug_ctx->os_error_handle = ctx->os.error_handle;
    413 	glDebugMessageCallback(gl_debug_logger, gl_debug_ctx);
    414 #ifdef _DEBUG
    415 	glEnable(GL_DEBUG_OUTPUT);
    416 #endif
    417 
    418 	read_only local_persist s8 compute_headers[BeamformerShaderKind_ComputeCount] = {
    419 		/* X(name, type, gltype) */
    420 		#define X(name, t, gltype) "\t" #gltype " " #name ";\n"
    421 		[BeamformerShaderKind_DAS] = s8_comp("layout(std140, binding = 0) uniform parameters {\n"
    422 			BEAMFORMER_DAS_UBO_PARAM_LIST
    423 			"};\n\n"
    424 		),
    425 		[BeamformerShaderKind_Decode] = s8_comp("layout(std140, binding = 0) uniform parameters {\n"
    426 			BEAMFORMER_DECODE_UBO_PARAM_LIST
    427 			"};\n\n"
    428 		),
    429 		[BeamformerShaderKind_Filter] = s8_comp("layout(std140, binding = 0) uniform parameters {\n"
    430 			BEAMFORMER_FILTER_UBO_PARAM_LIST
    431 			"};\n\n"
    432 		),
    433 		#undef X
    434 	};
    435 
    436 	for EachElement(beamformer_reloadable_compute_shader_info_indices, it) {
    437 		i32   index = beamformer_reloadable_compute_shader_info_indices[it];
    438 		Arena temp  = scratch;
    439 
    440 		s8 file = push_s8_from_parts(&temp, s8(OS_PATH_SEPARATOR), s8("shaders"),
    441 		                             beamformer_reloadable_shader_files[index]);
    442 
    443 		BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + index;
    444 		ShaderReloadContext *src = push_struct(memory, typeof(*src));
    445 		src->beamformer_context    = ctx;
    446 		src->reloadable_info_index = index;
    447 		src->link    = src;
    448 		src->header  = compute_headers[rsi->kind];
    449 		src->gl_type = GL_COMPUTE_SHADER;
    450 		os_add_file_watch(&ctx->os, memory, file, reload_shader_indirect, (iptr)src);
    451 		reload_shader_indirect(&ctx->os, file, (iptr)src, *memory);
    452 	}
    453 	os_wake_waiters(&worker->sync_variable);
    454 
    455 	FrameViewRenderContext *fvr = &ctx->frame_view_render_context;
    456 	glCreateFramebuffers(countof(fvr->framebuffers), fvr->framebuffers);
    457 	LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[0], s8("Frame View Framebuffer"));
    458 	LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[1], s8("Frame View Resolving Framebuffer"));
    459 
    460 	glCreateRenderbuffers(countof(fvr->renderbuffers), fvr->renderbuffers);
    461 	i32 msaa_samples = ctx->gl.vendor_id == GL_VENDOR_ARM? 4 : 8;
    462 	glNamedRenderbufferStorageMultisample(fvr->renderbuffers[0], msaa_samples, GL_RGBA8,
    463 	                                      FRAME_VIEW_RENDER_TARGET_SIZE);
    464 	glNamedRenderbufferStorageMultisample(fvr->renderbuffers[1], msaa_samples, GL_DEPTH_COMPONENT24,
    465 	                                      FRAME_VIEW_RENDER_TARGET_SIZE);
    466 
    467 	static_assert(countof(beamformer_reloadable_render_shader_info_indices) == 1,
    468 	              "only a single render shader is currently handled");
    469 	i32 render_rsi_index = beamformer_reloadable_render_shader_info_indices[0];
    470 
    471 	s8 render_file = push_s8_from_parts(&scratch, s8(OS_PATH_SEPARATOR), s8("shaders"),
    472 	                                    beamformer_reloadable_shader_files[render_rsi_index]);
    473 	ShaderReloadContext *render_3d = push_struct(memory, typeof(*render_3d));
    474 	render_3d->beamformer_context    = ctx;
    475 	render_3d->reloadable_info_index = render_rsi_index;
    476 	render_3d->gl_type = GL_FRAGMENT_SHADER;
    477 	render_3d->header  = s8(""
    478 	"layout(location = 0) in  vec3 normal;\n"
    479 	"layout(location = 1) in  vec3 texture_coordinate;\n\n"
    480 	"layout(location = 2) in  vec3 test_texture_coordinate;\n\n"
    481 	"layout(location = 0) out vec4 out_colour;\n\n"
    482 	"layout(location = " str(FRAME_VIEW_DYNAMIC_RANGE_LOC) ") uniform float u_db_cutoff = 60;\n"
    483 	"layout(location = " str(FRAME_VIEW_THRESHOLD_LOC)     ") uniform float u_threshold = 40;\n"
    484 	"layout(location = " str(FRAME_VIEW_GAMMA_LOC)         ") uniform float u_gamma     = 1;\n"
    485 	"layout(location = " str(FRAME_VIEW_LOG_SCALE_LOC)     ") uniform bool  u_log_scale;\n"
    486 	"layout(location = " str(FRAME_VIEW_BB_COLOUR_LOC)     ") uniform vec4  u_bb_colour   = vec4(" str(FRAME_VIEW_BB_COLOUR) ");\n"
    487 	"layout(location = " str(FRAME_VIEW_BB_FRACTION_LOC)   ") uniform float u_bb_fraction = " str(FRAME_VIEW_BB_FRACTION) ";\n"
    488 	"layout(location = " str(FRAME_VIEW_SOLID_BB_LOC)      ") uniform bool  u_solid_bb;\n"
    489 	"\n"
    490 	"layout(binding = 0) uniform sampler3D u_texture;\n");
    491 
    492 	render_3d->link = push_struct(memory, typeof(*render_3d));
    493 	render_3d->link->reloadable_info_index = -1;
    494 	render_3d->link->gl_type = GL_VERTEX_SHADER;
    495 	render_3d->link->link    = render_3d;
    496 	render_3d->link->header  = s8(""
    497 	"layout(location = 0) in vec3 v_position;\n"
    498 	"layout(location = 1) in vec3 v_normal;\n"
    499 	"\n"
    500 	"layout(location = 0) out vec3 f_normal;\n"
    501 	"layout(location = 1) out vec3 f_texture_coordinate;\n"
    502 	"layout(location = 2) out vec3 f_orig_texture_coordinate;\n"
    503 	"\n"
    504 	"layout(location = " str(FRAME_VIEW_MODEL_MATRIX_LOC)  ") uniform mat4  u_model;\n"
    505 	"layout(location = " str(FRAME_VIEW_VIEW_MATRIX_LOC)   ") uniform mat4  u_view;\n"
    506 	"layout(location = " str(FRAME_VIEW_PROJ_MATRIX_LOC)   ") uniform mat4  u_projection;\n"
    507 	"\n"
    508 	"\n"
    509 	"void main()\n"
    510 	"{\n"
    511 	"\tvec3 pos = v_position;\n"
    512 	"\tf_orig_texture_coordinate = (2 * v_position + 1) / 2;\n"
    513 	//"\tif (v_position.y == -1) pos.x = clamp(v_position.x, -u_clip_fraction, u_clip_fraction);\n"
    514 	"\tvec3 tex_coord = (2 * pos + 1) / 2;\n"
    515 	"\tf_texture_coordinate = tex_coord.xzy;\n"
    516 	//"\tf_texture_coordinate = u_swizzle? tex_coord.xzy : tex_coord;\n"
    517 	//"\tf_normal    = normalize(mat3(u_model) * v_normal);\n"
    518 	"\tf_normal    = v_normal;\n"
    519 	"\tgl_Position = u_projection * u_view * u_model * vec4(pos, 1);\n"
    520 	"}\n");
    521 	reload_shader(&ctx->os, render_file, (iptr)render_3d, *memory);
    522 	os_add_file_watch(&ctx->os, memory, render_file, reload_shader, (iptr)render_3d);
    523 
    524 	f32 unit_cube_vertices[] = {
    525 		 0.5f,  0.5f, -0.5f,
    526 		 0.5f,  0.5f, -0.5f,
    527 		 0.5f,  0.5f, -0.5f,
    528 		 0.5f, -0.5f, -0.5f,
    529 		 0.5f, -0.5f, -0.5f,
    530 		 0.5f, -0.5f, -0.5f,
    531 		 0.5f,  0.5f,  0.5f,
    532 		 0.5f,  0.5f,  0.5f,
    533 		 0.5f,  0.5f,  0.5f,
    534 		 0.5f, -0.5f,  0.5f,
    535 		 0.5f, -0.5f,  0.5f,
    536 		 0.5f, -0.5f,  0.5f,
    537 		-0.5f,  0.5f, -0.5f,
    538 		-0.5f,  0.5f, -0.5f,
    539 		-0.5f,  0.5f, -0.5f,
    540 		-0.5f, -0.5f, -0.5f,
    541 		-0.5f, -0.5f, -0.5f,
    542 		-0.5f, -0.5f, -0.5f,
    543 		-0.5f,  0.5f,  0.5f,
    544 		-0.5f,  0.5f,  0.5f,
    545 		-0.5f,  0.5f,  0.5f,
    546 		-0.5f, -0.5f,  0.5f,
    547 		-0.5f, -0.5f,  0.5f,
    548 		-0.5f, -0.5f,  0.5f
    549 	};
    550 	f32 unit_cube_normals[] = {
    551 		 0.0f,  0.0f, -1.0f,
    552 		 0.0f,  1.0f,  0.0f,
    553 		 1.0f,  0.0f,  0.0f,
    554 		 0.0f,  0.0f, -1.0f,
    555 		 0.0f, -1.0f,  0.0f,
    556 		 1.0f,  0.0f,  0.0f,
    557 		 0.0f,  0.0f,  1.0f,
    558 		 0.0f,  1.0f,  0.0f,
    559 		 1.0f,  0.0f,  0.0f,
    560 		 0.0f,  0.0f,  1.0f,
    561 		 0.0f, -1.0f,  0.0f,
    562 		 1.0f,  0.0f,  0.0f,
    563 		 0.0f,  0.0f, -1.0f,
    564 		 0.0f,  1.0f,  0.0f,
    565 		-1.0f,  0.0f,  0.0f,
    566 		 0.0f,  0.0f, -1.0f,
    567 		 0.0f, -1.0f,  0.0f,
    568 		-1.0f,  0.0f,  0.0f,
    569 		 0.0f,  0.0f,  1.0f,
    570 		 0.0f,  1.0f,  0.0f,
    571 		-1.0f,  0.0f,  0.0f,
    572 		 0.0f,  0.0f,  1.0f,
    573 		 0.0f, -1.0f,  0.0f,
    574 		-1.0f,  0.0f,  0.0f
    575 	};
    576 	u16 unit_cube_indices[] = {
    577 		1,  13, 19,
    578 		1,  19, 7,
    579 		9,  6,  18,
    580 		9,  18, 21,
    581 		23, 20, 14,
    582 		23, 14, 17,
    583 		16, 4,  10,
    584 		16, 10, 22,
    585 		5,  2,  8,
    586 		5,  8,  11,
    587 		15, 12, 0,
    588 		15, 0,  3
    589 	};
    590 
    591 	cs->unit_cube_model = render_model_from_arrays(unit_cube_vertices, unit_cube_normals,
    592 	                                               sizeof(unit_cube_vertices),
    593 	                                               unit_cube_indices, countof(unit_cube_indices));
    594 
    595 	memory->end = scratch.end;
    596 }
    597 
    598 function void
    599 beamformer_invalidate_shared_memory(BeamformerCtx *ctx)
    600 {
    601 	/* NOTE(rnp): work around pebkac when the beamformer is closed while we are doing live
    602 	 * imaging. if the verasonics is blocked in an external function (calling the library
    603 	 * to start compute) it is impossible for us to get it to properly shut down which
    604 	 * will sometimes result in us needing to power cycle the system. set the shared memory
    605 	 * into an error state and release dispatch lock so that future calls will error instead
    606 	 * of blocking.
    607 	 */
    608 	BeamformerSharedMemory *sm = ctx->shared_memory.region;
    609 	BeamformerSharedMemoryLockKind lock = BeamformerSharedMemoryLockKind_DispatchCompute;
    610 	atomic_store_u32(&sm->invalid, 1);
    611 	atomic_store_u32(&sm->external_work_queue.ridx, sm->external_work_queue.widx);
    612 	DEBUG_DECL(if (sm->locks[lock])) {
    613 		os_shared_memory_region_unlock(&ctx->shared_memory, sm->locks, (i32)lock);
    614 	}
    615 
    616 	atomic_or_u32(&sm->live_imaging_dirty_flags, BeamformerLiveImagingDirtyFlags_StopImaging);
    617 }