ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

static.c (14823B)


      1 /* See LICENSE for license details. */
      2 #ifndef _DEBUG
      3 
      4 #include "beamformer.c"
      5 #define debug_init(...)
      6 
      7 #else
      8 
      9 global void *debug_lib;
     10 
     11 #define DEBUG_ENTRY_POINTS \
     12 	X(beamformer_frame_step)           \
     13 	X(beamformer_complete_compute)     \
     14 	X(beamformer_compute_setup)        \
     15 	X(beamformer_reload_shader)        \
     16 	X(beamform_work_queue_push)        \
     17 	X(beamform_work_queue_push_commit)
     18 
     19 #define X(name) global name ##_fn *name;
     20 DEBUG_ENTRY_POINTS
     21 #undef X
     22 
     23 function FILE_WATCH_CALLBACK_FN(debug_reload)
     24 {
     25 	BeamformerInput *input = (BeamformerInput *)user_data;
     26 	Stream err             = arena_stream(arena);
     27 
     28 	/* NOTE(rnp): spin until compute thread finishes its work (we will probably
     29 	 * never reload while compute is in progress but just incase). */
     30 	while (!atomic_load_u32(&os->compute_worker.asleep));
     31 
     32 	os_unload_library(debug_lib);
     33 	debug_lib = os_load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME, &err);
     34 
     35 	#define X(name) name = os_lookup_dynamic_symbol(debug_lib, #name, &err);
     36 	DEBUG_ENTRY_POINTS
     37 	#undef X
     38 
     39 	stream_append_s8(&err, s8("Reloaded Main Executable\n"));
     40 	os_write_file(os->error_handle, stream_to_s8(&err));
     41 
     42 	input->executable_reloaded = 1;
     43 
     44 	return 1;
     45 }
     46 
     47 function void
     48 debug_init(OS *os, iptr input, Arena *arena)
     49 {
     50 	os_add_file_watch(os, arena, s8(OS_DEBUG_LIB_NAME), debug_reload, input);
     51 	debug_reload(os, s8(""), input, *arena);
     52 
     53 	Stream err = arena_stream(*arena);
     54 	void *rdoc = os_get_module(OS_RENDERDOC_SONAME, 0);
     55 	if (rdoc) {
     56 		renderdoc_get_api_fn *get_api = os_lookup_dynamic_symbol(rdoc, "RENDERDOC_GetAPI", &err);
     57 		if (get_api) {
     58 			RenderDocAPI *api = 0;
     59 			if (get_api(10600, (void **)&api)) {
     60 				os->start_frame_capture = RENDERDOC_START_FRAME_CAPTURE(api);
     61 				os->end_frame_capture   = RENDERDOC_END_FRAME_CAPTURE(api);
     62 				stream_append_s8(&err, s8("loaded: " OS_RENDERDOC_SONAME "\n"));
     63 			}
     64 		}
     65 	}
     66 
     67 	os_write_file(os->error_handle, stream_to_s8(&err));
     68 }
     69 
     70 #endif /* _DEBUG */
     71 
     72 #define static_path_join(a, b) (a OS_PATH_SEPARATOR b)
     73 
     74 struct gl_debug_ctx {
     75 	Stream stream;
     76 	iptr   os_error_handle;
     77 };
     78 
     79 function void
     80 gl_debug_logger(u32 src, u32 type, u32 id, u32 lvl, i32 len, const char *msg, const void *userctx)
     81 {
     82 	(void)src; (void)type; (void)id;
     83 
     84 	struct gl_debug_ctx *ctx = (struct gl_debug_ctx *)userctx;
     85 	Stream *e = &ctx->stream;
     86 	stream_append_s8s(e, s8("[OpenGL] "), (s8){.len = len, .data = (u8 *)msg}, s8("\n"));
     87 	os_write_file(ctx->os_error_handle, stream_to_s8(e));
     88 	stream_reset(e, 0);
     89 }
     90 
     91 function void
     92 get_gl_params(GLParams *gl, Stream *err)
     93 {
     94 	char *vendor = (char *)glGetString(GL_VENDOR);
     95 	if (!vendor) {
     96 		stream_append_s8(err, s8("Failed to determine GL Vendor\n"));
     97 		os_fatal(stream_to_s8(err));
     98 	}
     99 	/* TODO(rnp): str prefix of */
    100 	switch (vendor[0]) {
    101 	case 'A': gl->vendor_id = GL_VENDOR_AMD;    break;
    102 	case 'I': gl->vendor_id = GL_VENDOR_INTEL;  break;
    103 	case 'N': gl->vendor_id = GL_VENDOR_NVIDIA; break;
    104 	/* NOTE(rnp): freedreno */
    105 	case 'f': gl->vendor_id = GL_VENDOR_ARM;    break;
    106 	/* NOTE(rnp): Microsoft Corporation - weird win32 thing (microsoft is just using mesa for the driver) */
    107 	case 'M': gl->vendor_id = GL_VENDOR_ARM;    break;
    108 	default:
    109 		stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n"));
    110 		os_fatal(stream_to_s8(err));
    111 	}
    112 
    113 	#define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl->name);
    114 	GL_PARAMETERS
    115 	#undef X
    116 }
    117 
    118 function void
    119 validate_gl_requirements(GLParams *gl, Arena a)
    120 {
    121 	Stream s = arena_stream(a);
    122 
    123 	if (gl->max_ubo_size < sizeof(BeamformerParameters)) {
    124 		stream_append_s8(&s, s8("GPU must support UBOs of at least "));
    125 		stream_append_i64(&s, sizeof(BeamformerParameters));
    126 		stream_append_s8(&s, s8(" bytes!\n"));
    127 	}
    128 
    129 	#define X(name, ret, params) if (!name) stream_append_s8s(&s, s8("missing required GL function:"), s8(#name), s8("\n"));
    130 	OGLProcedureList
    131 	#undef X
    132 
    133 	if (s.widx) os_fatal(stream_to_s8(&s));
    134 }
    135 
    136 function void
    137 dump_gl_params(GLParams *gl, Arena a, OS *os)
    138 {
    139 	(void)gl; (void)a;
    140 #ifdef _DEBUG
    141 	s8 vendor = s8("vendor:");
    142 	iz max_width = vendor.len;
    143 	#define X(glname, name, suffix) if (s8(#name).len > max_width) max_width = s8(#name ":").len;
    144 	GL_PARAMETERS
    145 	#undef X
    146 	max_width++;
    147 
    148 	Stream s = arena_stream(a);
    149 	stream_append_s8s(&s, s8("---- GL Parameters ----\n"), vendor);
    150 	stream_pad(&s, ' ', max_width - vendor.len);
    151 	switch (gl->vendor_id) {
    152 	case GL_VENDOR_AMD:    stream_append_s8(&s, s8("AMD\n"));    break;
    153 	case GL_VENDOR_ARM:    stream_append_s8(&s, s8("ARM\n"));    break;
    154 	case GL_VENDOR_INTEL:  stream_append_s8(&s, s8("Intel\n"));  break;
    155 	case GL_VENDOR_NVIDIA: stream_append_s8(&s, s8("nVidia\n")); break;
    156 	}
    157 
    158 	#define X(glname, name, suffix) \
    159 		stream_append_s8(&s, s8(#name ":"));                \
    160 		stream_pad(&s, ' ', max_width - s8(#name ":").len); \
    161 		stream_append_i64(&s, gl->name);                    \
    162 		stream_append_s8(&s, s8(suffix));                   \
    163 		stream_append_byte(&s, '\n');
    164 	GL_PARAMETERS
    165 	#undef X
    166 	stream_append_s8(&s, s8("-----------------------\n"));
    167 	os_write_file(os->error_handle, stream_to_s8(&s));
    168 #endif
    169 }
    170 
    171 function FILE_WATCH_CALLBACK_FN(reload_shader)
    172 {
    173 	ShaderReloadContext *ctx = (typeof(ctx))user_data;
    174 	return beamformer_reload_shader(ctx->beamformer_context, ctx, arena, ctx->name);
    175 }
    176 
    177 function FILE_WATCH_CALLBACK_FN(reload_shader_indirect)
    178 {
    179 	ShaderReloadContext *src = (typeof(src))user_data;
    180 	BeamformerCtx *ctx = src->beamformer_context;
    181 	BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue);
    182 	if (work) {
    183 		work->kind = BeamformerWorkKind_ReloadShader,
    184 		work->shader_reload_context = src;
    185 		beamform_work_queue_push_commit(ctx->beamform_work_queue);
    186 		os_wake_waiters(&os->compute_worker.sync_variable);
    187 	}
    188 	return 1;
    189 }
    190 
    191 function FILE_WATCH_CALLBACK_FN(load_cuda_lib)
    192 {
    193 	CudaLib *cl = (CudaLib *)user_data;
    194 	b32 result  = os_file_exists((c8 *)path.data);
    195 	if (result) {
    196 		Stream err = arena_stream(arena);
    197 
    198 		stream_append_s8(&err, s8("loading CUDA lib: " OS_CUDA_LIB_NAME "\n"));
    199 		os_unload_library(cl->lib);
    200 		cl->lib = os_load_library((c8 *)path.data, OS_CUDA_LIB_TEMP_NAME, &err);
    201 		#define X(name, symname) cl->name = os_lookup_dynamic_symbol(cl->lib, symname, &err);
    202 		CUDA_LIB_FNS
    203 		#undef X
    204 
    205 		os_write_file(os->error_handle, stream_to_s8(&err));
    206 	}
    207 
    208 	#define X(name, symname) if (!cl->name) cl->name = cuda_ ## name ## _stub;
    209 	CUDA_LIB_FNS
    210 	#undef X
    211 
    212 	return result;
    213 }
    214 
    215 #define GLFW_VISIBLE 0x00020004
    216 void glfwWindowHint(i32, i32);
    217 iptr glfwCreateWindow(i32, i32, char *, iptr, iptr);
    218 void glfwMakeContextCurrent(iptr);
    219 
    220 function OS_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point)
    221 {
    222 	GLWorkerThreadContext *ctx = (GLWorkerThreadContext *)_ctx;
    223 
    224 	glfwMakeContextCurrent(ctx->window_handle);
    225 	ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
    226 
    227 	beamformer_compute_setup(ctx->user_context, ctx->arena, ctx->gl_context);
    228 
    229 	for (;;) {
    230 		for (;;) {
    231 			i32 expected = 0;
    232 			if (atomic_cas_u32(&ctx->sync_variable, &expected, 1))
    233 				break;
    234 
    235 			atomic_store_u32(&ctx->asleep, 1);
    236 			os_wait_on_value(&ctx->sync_variable, 1, -1);
    237 			atomic_store_u32(&ctx->asleep, 0);
    238 		}
    239 		beamformer_complete_compute(ctx->user_context, ctx->arena, ctx->gl_context);
    240 	}
    241 
    242 	unreachable();
    243 
    244 	return 0;
    245 }
    246 
    247 function void
    248 setup_beamformer(BeamformerCtx *ctx, BeamformerInput *input, Arena *memory)
    249 {
    250 	debug_init(&ctx->os, (iptr)input, memory);
    251 
    252 	ctx->window_size  = (uv2){.w = 1280, .h = 840};
    253 
    254 	SetConfigFlags(FLAG_VSYNC_HINT|FLAG_WINDOW_ALWAYS_RUN);
    255 	InitWindow(ctx->window_size.w, ctx->window_size.h, "OGL Beamformer");
    256 	/* NOTE: do this after initing so that the window starts out floating in tiling wm */
    257 	SetWindowState(FLAG_WINDOW_RESIZABLE);
    258 	SetWindowMinSize(840, ctx->window_size.h);
    259 
    260 	glfwWindowHint(GLFW_VISIBLE, 0);
    261 	iptr raylib_window_handle = (iptr)GetPlatformWindowHandle();
    262 
    263 	#define X(name, ret, params) name = (name##_fn *)os_gl_proc_address(#name);
    264 	OGLProcedureList
    265 	#undef X
    266 	/* NOTE: Gather information about the GPU */
    267 	get_gl_params(&ctx->gl, &ctx->error_stream);
    268 	dump_gl_params(&ctx->gl, *memory, &ctx->os);
    269 	validate_gl_requirements(&ctx->gl, *memory);
    270 
    271 	GLWorkerThreadContext *worker = &ctx->os.compute_worker;
    272 	worker->window_handle = glfwCreateWindow(320, 240, "", 0, raylib_window_handle);
    273 	worker->handle        = os_create_thread(*memory, (iptr)worker, s8("[compute]"),
    274 	                                         compute_worker_thread_entry_point);
    275 	/* TODO(rnp): we should lock this down after we have something working */
    276 	worker->user_context  = (iptr)ctx;
    277 
    278 	glfwMakeContextCurrent(raylib_window_handle);
    279 
    280 	ctx->latest_frame         = ctx->beamform_frames;
    281 	ctx->beamform_work_queue  = push_struct(memory, BeamformWorkQueue);
    282 	ctx->compute_shader_stats = push_struct(memory, ComputeShaderStats);
    283 	ctx->compute_timing_table = push_struct(memory, ComputeTimingTable);
    284 
    285 	ctx->shared_memory = os_create_shared_memory_area(memory, OS_SHARED_MEMORY_NAME,
    286 	                                                  BeamformerSharedMemoryLockKind_Count,
    287 	                                                  BEAMFORMER_SHARED_MEMORY_SIZE);
    288 	BeamformerSharedMemory *sm = ctx->shared_memory.region;
    289 	if (!sm) os_fatal(s8("Get more ram lol\n"));
    290 	mem_clear(sm, 0, sizeof(*sm));
    291 
    292 	sm->version = BEAMFORMER_SHARED_MEMORY_VERSION;
    293 
    294 	/* NOTE: default compute shader pipeline */
    295 	sm->compute_stages[0]    = BeamformerShaderKind_Decode;
    296 	sm->compute_stages[1]    = BeamformerShaderKind_DASCompute;
    297 	sm->compute_stages_count = 2;
    298 
    299 	if (ctx->gl.vendor_id == GL_VENDOR_NVIDIA
    300 	    && load_cuda_lib(&ctx->os, s8(OS_CUDA_LIB_NAME), (iptr)&ctx->cuda_lib, *memory))
    301 	{
    302 		os_add_file_watch(&ctx->os, memory, s8(OS_CUDA_LIB_NAME), load_cuda_lib,
    303 		                  (iptr)&ctx->cuda_lib);
    304 	} else {
    305 		#define X(name, symname) if (!ctx->cuda_lib.name) ctx->cuda_lib.name = cuda_ ## name ## _stub;
    306 		CUDA_LIB_FNS
    307 		#undef X
    308 	}
    309 
    310 	/* NOTE: set up OpenGL debug logging */
    311 	struct gl_debug_ctx *gl_debug_ctx = push_struct(memory, typeof(*gl_debug_ctx));
    312 	gl_debug_ctx->stream          = stream_alloc(memory, 1024);
    313 	gl_debug_ctx->os_error_handle = ctx->os.error_handle;
    314 	glDebugMessageCallback(gl_debug_logger, gl_debug_ctx);
    315 #ifdef _DEBUG
    316 	glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
    317 #endif
    318 
    319 	#define X(name, type, size, gltype, glsize, comment) "\t" #gltype " " #name #glsize "; " comment "\n"
    320 	read_only local_persist s8 compute_parameters_header = s8_comp(""
    321 		"layout(std140, binding = 0) uniform parameters {\n"
    322 		BEAMFORMER_PARAMS_HEAD
    323 		BEAMFORMER_UI_PARAMS
    324 		BEAMFORMER_PARAMS_TAIL
    325 		"};\n\n"
    326 	);
    327 	#undef X
    328 
    329 	ComputeShaderCtx *cs = &ctx->csctx;
    330 	#define X(e, sn, f, nh, pretty_name) do if (s8(f).len > 0) {          \
    331 		ShaderReloadContext *src = push_struct(memory, typeof(*src)); \
    332 		src->beamformer_context  = ctx;                               \
    333 		if (nh) src->header = compute_parameters_header;              \
    334 		src->path    = s8(static_path_join("shaders", f ".glsl"));    \
    335 		src->name    = src->path;                                     \
    336 		src->shader  = cs->programs + BeamformerShaderKind_##e;       \
    337 		src->gl_type = GL_COMPUTE_SHADER;                             \
    338 		src->kind    = BeamformerShaderKind_##e;                      \
    339 		src->link    = src;                                           \
    340 		os_add_file_watch(&ctx->os, memory, src->path, reload_shader_indirect, (iptr)src); \
    341 		reload_shader_indirect(&ctx->os, src->path, (iptr)src, *memory); \
    342 	} while (0);
    343 	COMPUTE_SHADERS
    344 	#undef X
    345 	os_wake_waiters(&worker->sync_variable);
    346 
    347 	FrameViewRenderContext *fvr = &ctx->frame_view_render_context;
    348 	glCreateFramebuffers(1, &fvr->framebuffer);
    349 	LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffer, s8("Frame View Render Framebuffer"));
    350 	f32 vertices[] = {
    351 		-1,  1, 0, 0,
    352 		-1, -1, 0, 1,
    353 		 1, -1, 1, 1,
    354 		-1,  1, 0, 0,
    355 		 1, -1, 1, 1,
    356 		 1,  1, 1, 0,
    357 	};
    358 	glCreateVertexArrays(1, &fvr->vao);
    359 	glCreateBuffers(1, &fvr->vbo);
    360 
    361 	glNamedBufferData(fvr->vbo, sizeof(vertices), vertices, GL_STATIC_DRAW);
    362 
    363 	glEnableVertexArrayAttrib(fvr->vao, 0);
    364 	glEnableVertexArrayAttrib(fvr->vao, 1);
    365 	glVertexArrayVertexBuffer(fvr->vao, 0, fvr->vbo, 0,               4 * sizeof(f32));
    366 	glVertexArrayVertexBuffer(fvr->vao, 1, fvr->vbo, 2 * sizeof(f32), 4 * sizeof(f32));
    367 	glVertexArrayAttribFormat(fvr->vao, 0, 2, GL_FLOAT, 0, 0);
    368 	glVertexArrayAttribFormat(fvr->vao, 1, 2, GL_FLOAT, 0, 2 * sizeof(f32));
    369 	glVertexArrayAttribBinding(fvr->vao, 0, 0);
    370 	glVertexArrayAttribBinding(fvr->vao, 1, 0);
    371 
    372 	ShaderReloadContext *render_2d = push_struct(memory, typeof(*render_2d));
    373 	render_2d->beamformer_context = ctx;
    374 	render_2d->path    = s8(static_path_join("shaders", "render_2d.frag.glsl"));
    375 	render_2d->name    = s8("shaders/render_2d.glsl");
    376 	render_2d->gl_type = GL_FRAGMENT_SHADER;
    377 	render_2d->kind    = BeamformerShaderKind_Render2D;
    378 	render_2d->shader  = &fvr->shader;
    379 	render_2d->header  = s8(""
    380 	"layout(location = 0) in  vec2 texture_coordinate;\n"
    381 	"layout(location = 0) out vec4 v_out_colour;\n\n"
    382 	"layout(location = " str(FRAME_VIEW_RENDER_DYNAMIC_RANGE_LOC) ") uniform float u_db_cutoff = 60;\n"
    383 	"layout(location = " str(FRAME_VIEW_RENDER_THRESHOLD_LOC)     ") uniform float u_threshold = 40;\n"
    384 	"layout(location = " str(FRAME_VIEW_RENDER_GAMMA_LOC)         ") uniform float u_gamma     = 1;\n"
    385 	"layout(location = " str(FRAME_VIEW_RENDER_LOG_SCALE_LOC)     ") uniform bool  u_log_scale;\n"
    386 	"\n#line 1\n");
    387 	render_2d->link = push_struct(memory, typeof(*render_2d));
    388 	render_2d->link->gl_type = GL_VERTEX_SHADER;
    389 	render_2d->link->link    = render_2d;
    390 	render_2d->link->header  = s8(""
    391 	"layout(location = 0) in vec2 v_position;\n"
    392 	"layout(location = 1) in vec2 v_texture_coordinate;\n"
    393 	"\n"
    394 	"layout(location = 0) out vec2 f_texture_coordinate;\n"
    395 	"\n"
    396 	"void main()\n"
    397 	"{\n"
    398 	"\tf_texture_coordinate = v_texture_coordinate;\n"
    399 	"\tgl_Position = vec4(v_position, 0, 1);\n"
    400 	"}\n");
    401 	reload_shader(&ctx->os, render_2d->path, (iptr)render_2d, *memory);
    402 	os_add_file_watch(&ctx->os, memory, render_2d->path, reload_shader, (iptr)render_2d);
    403 }
    404 
    405 function void
    406 beamformer_invalidate_shared_memory(BeamformerCtx *ctx)
    407 {
    408 	/* NOTE(rnp): work around pebkac when the beamformer is closed while we are doing live
    409 	 * imaging. if the verasonics is blocked in an external function (calling the library
    410 	 * to start compute) it is impossible for us to get it to properly shut down which
    411 	 * will sometimes result in us needing to power cycle the system. set the shared memory
    412 	 * into an error state and release dispatch lock so that future calls will error instead
    413 	 * of blocking.
    414 	 */
    415 	BeamformerSharedMemory *sm = ctx->shared_memory.region;
    416 	BeamformerSharedMemoryLockKind lock = BeamformerSharedMemoryLockKind_DispatchCompute;
    417 	atomic_store_u32(&sm->invalid, 1);
    418 	atomic_store_u32(&sm->external_work_queue.ridx, sm->external_work_queue.widx);
    419 	DEBUG_DECL(if (sm->locks[lock])) {
    420 		os_shared_memory_region_unlock(&ctx->shared_memory, sm->locks, lock);
    421 	}
    422 }