ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 4f6d77f9c7a5e9935bcc70bf2012cb66b252bb04
Parent: f7518ff88f916cfd61c95b307910fae2fb16a9ca
Author: Randy Palamar
Date:   Tue, 23 Sep 2025 12:11:23 -0600

core: JIT compile compute shaders

This way we don't need to do a bunch of upfront loading of shaders
we won't use. More importantly, but not covered by this commit, is
that this enables basically any parameter to be baked into the
shader without exponentially growing the amount of derivatives.

Diffstat:
Mbeamformer.c | 305+++++++++++++++++++++++++++++++++++++++----------------------------------------
Mbeamformer.h | 6++++--
Mbeamformer_shared_memory.c | 7++-----
Mbuild.c | 58+++++++++++++++++++++-------------------------------------
Mgenerated/beamformer.meta.c | 32+++++++++++++++++++-------------
Mstatic.c | 41++++++-----------------------------------
Mui.c | 3+--
7 files changed, 205 insertions(+), 247 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -1,5 +1,13 @@ /* See LICENSE for license details. */ /* TODO(rnp): + * [ ]: do JIT compilation of shaders + * - a larger subset of parameters can be made into compile time constants + * - preallocated storage for shaders is minimized + * - loops over TX and RX count can be unrolled + * - hot reload can still be trivially supported: + * - loop over shaders for the current pipeline + * - check if the base shader matches the shader we are trying to reload + * - load header and append constants which are stored in the pipeline parameters * [ ]: measure performance of doing channel mapping in a separate shader * [ ]: BeamformWorkQueue -> BeamformerWorkQueue * [ ]: need to keep track of gpu memory in some way @@ -316,7 +324,7 @@ fill_frame_compute_work(BeamformerCtx *ctx, BeamformWork *work, BeamformerViewPl } function void -do_sum_shader(BeamformerComputeContext *cc, u32 *in_textures, u32 in_texture_count, f32 in_scale, +do_sum_shader(BeamformerComputeContext *cc, u32 *in_textures, u32 in_texture_count, u32 out_texture, iv3 out_data_dim) { /* NOTE: zero output before summing */ @@ -324,7 +332,6 @@ do_sum_shader(BeamformerComputeContext *cc, u32 *in_textures, u32 in_texture_cou glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT); glBindImageTexture(0, out_texture, 0, GL_TRUE, 0, GL_READ_WRITE, GL_RG32F); - glProgramUniform1f(cc->programs[BeamformerShaderKind_Sum], SUM_PRESCALE_UNIFORM_LOC, in_scale); for (u32 i = 0; i < in_texture_count; i++) { glBindImageTexture(1, in_textures[i], 0, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F); glDispatchCompute(ORONE((u32)out_data_dim.x / 32u), @@ -557,9 +564,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) if (commit) { u32 index = cp->pipeline.shader_count++; - cp->pipeline.shaders[index] = shader; - cp->pipeline.program_indices[index] = (u32)match; - cp->pipeline.parameters[index] = *sp; + cp->pipeline.shaders[index] = shader; + cp->pipeline.parameters[index] = *sp; + cp->shader_matches[index] = (u32)match; } } cp->pipeline.data_kind = data_kind; @@ -664,6 +671,124 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) } function void +stream_push_shader_header(Stream *s, BeamformerShaderKind shader_kind, s8 header) +{ + stream_append_s8s(s, s8("#version 460 core\n\n"), header); + + switch (shader_kind) { + case BeamformerShaderKind_Filter:{ + stream_append_s8(s, s8("" + "layout(local_size_x = " str(FILTER_LOCAL_SIZE_X) ", " + "local_size_y = " str(FILTER_LOCAL_SIZE_Y) ", " + "local_size_z = " str(FILTER_LOCAL_SIZE_Z) ") in;\n\n" + )); + }break; + case BeamformerShaderKind_DAS:{ + stream_append_s8(s, s8("" + "layout(local_size_x = " str(DAS_LOCAL_SIZE_X) ", " + "local_size_y = " str(DAS_LOCAL_SIZE_Y) ", " + "local_size_z = " str(DAS_LOCAL_SIZE_Z) ") in;\n\n" + "layout(location = " str(DAS_VOXEL_OFFSET_UNIFORM_LOC) ") uniform ivec3 u_voxel_offset;\n" + "layout(location = " str(DAS_CYCLE_T_UNIFORM_LOC) ") uniform uint u_cycle_t;\n" + "layout(location = " str(DAS_FAST_CHANNEL_UNIFORM_LOC) ") uniform int u_channel;\n\n" + )); + + #define X(k, id, ...) "#define ShaderKind_" #k " " #id "\n" + stream_append_s8s(s, s8(DAS_SHADER_KIND_LIST), s8("\n")); + #undef X + }break; + case BeamformerShaderKind_Decode:{ + stream_append_s8s(s, s8("" + "layout(local_size_x = " str(DECODE_LOCAL_SIZE_X) ", " + "local_size_y = " str(DECODE_LOCAL_SIZE_Y) ", " + "local_size_z = " str(DECODE_LOCAL_SIZE_Z) ") in;\n\n" + "layout(location = " str(DECODE_FIRST_PASS_UNIFORM_LOC) ") uniform bool u_first_pass;\n\n" + )); + }break; + case BeamformerShaderKind_MinMax:{ + stream_append_s8(s, s8("layout(location = " str(MIN_MAX_MIPS_LEVEL_UNIFORM_LOC) + ") uniform int u_mip_map;\n\n")); + }break; + case BeamformerShaderKind_Sum:{ + stream_append_s8(s, s8("layout(location = " str(SUM_PRESCALE_UNIFORM_LOC) + ") uniform float u_sum_prescale = 1.0;\n\n")); + }break; + default:{}break; + } +} + +function void +load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_slot, Arena arena) +{ + read_only local_persist s8 compute_headers[BeamformerShaderKind_ComputeCount] = { + /* X(name, type, gltype) */ + #define X(name, t, gltype) "\t" #gltype " " #name ";\n" + [BeamformerShaderKind_DAS] = s8_comp("layout(std140, binding = 0) uniform parameters {\n" + BEAMFORMER_DAS_UBO_PARAM_LIST + "};\n\n" + ), + [BeamformerShaderKind_Decode] = s8_comp("layout(std140, binding = 0) uniform parameters {\n" + BEAMFORMER_DECODE_UBO_PARAM_LIST + "};\n\n" + ), + [BeamformerShaderKind_Filter] = s8_comp("layout(std140, binding = 0) uniform parameters {\n" + BEAMFORMER_FILTER_UBO_PARAM_LIST + "};\n\n" + ), + #undef X + }; + + BeamformerShaderKind shader = cp->pipeline.shaders[shader_slot]; + BeamformerShaderDescriptor *sd = beamformer_shader_descriptors + shader; + + + u32 program = 0; + i32 reloadable_index = beamformer_shader_reloadable_index_by_shader[shader]; + if (reloadable_index != -1) { + BeamformerShaderKind base_shader = beamformer_reloadable_shader_kinds[reloadable_index]; + s8 path = push_s8_from_parts(&arena, ctx->os.path_separator, s8("shaders"), + beamformer_reloadable_shader_files[reloadable_index]); + + Stream shader_stream = arena_stream(arena); + stream_push_shader_header(&shader_stream, base_shader, compute_headers[base_shader]); + + stream_append_s8(&shader_stream, beamformer_shader_local_header_strings[reloadable_index]); + + i32 *header_indices = beamformer_shader_header_vectors[sd - beamformer_shader_descriptors]; + for (i32 index = 0; index < sd->header_vector_length; index++) + stream_append_s8s(&shader_stream, beamformer_shader_global_header_strings[header_indices[index]], s8("\n")); + + i32 *match_vector = beamformer_shader_match_vectors[cp->shader_matches[shader_slot]]; + for (i32 index = 0; index < sd->match_vector_length; index++) { + stream_append_s8s(&shader_stream, s8("#define "), beamformer_shader_descriptor_header_strings[header_indices[index]], s8(" (")); + stream_append_i64(&shader_stream, match_vector[index]); + stream_append_s8(&shader_stream, s8(")\n")); + } + + if (sd->has_local_flags) { + stream_append_s8(&shader_stream, s8("#define ShaderFlags (0x")); + stream_append_hex_u64(&shader_stream, (u64)match_vector[sd->match_vector_length]); + stream_append_s8(&shader_stream, s8(")\n")); + } + + stream_append_s8(&shader_stream, s8("\n#line 1\n")); + + s8 shader_text = arena_stream_commit(&arena, &shader_stream); + s8 file_text = os_read_whole_file(&arena, (c8 *)path.data); + + assert(shader_text.data + shader_text.len == file_text.data); + shader_text.len += file_text.len; + + /* TODO(rnp): instance name */ + s8 shader_name = beamformer_shader_names[shader]; + program = load_shader(&ctx->os, arena, &shader_text, (u32 []){GL_COMPUTE_SHADER}, 1, shader_name); + } + + glDeleteProgram(cp->programs[shader_slot]); + cp->programs[shader_slot] = program; +} + +function void beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 block, Arena arena) { BeamformerParameterBlock *pb = beamformer_parameter_block_lock(&ctx->shared_memory, block, -1); @@ -683,6 +808,9 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, 1 << BeamformerParameterBlockRegion_Parameters; pb->dirty_regions &= ~mask; + for (u32 shader_slot = 0; shader_slot < cp->pipeline.shader_count; shader_slot++) + load_compute_shader(ctx, cp, shader_slot, arena); + #define X(k, t, v) glNamedBufferSubData(cp->ubos[BeamformerComputeUBOKind_##k], \ 0, sizeof(t), &cp->v ## _ubo_data); BEAMFORMER_COMPUTE_UBO_LIST @@ -740,14 +868,14 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, function void do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame *frame, - BeamformerShaderKind shader, u32 program_index, BeamformerShaderParameters *sp, Arena arena) + BeamformerShaderKind shader, u32 shader_slot, BeamformerShaderParameters *sp, Arena arena) { BeamformerComputeContext *cc = &ctx->compute_context; - i32 *match_vector = beamformer_shader_match_vectors[program_index]; + i32 *match_vector = beamformer_shader_match_vectors[cp->shader_matches[shader_slot]]; BeamformerShaderDescriptor *shader_descriptor = beamformer_shader_descriptors + shader; - u32 program = cc->programs[program_index]; + u32 program = cp->programs[shader_slot]; glUseProgram(program); u32 output_ssbo_idx = !cc->last_output_ssbo_index; @@ -814,7 +942,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame for (i32 i = 1; i < frame->mips; i++) { glBindImageTexture(0, frame->texture, i - 1, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F); glBindImageTexture(1, frame->texture, i - 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F); - glProgramUniform1i(cc->programs[shader], MIN_MAX_MIPS_LEVEL_UNIFORM_LOC, i); + glProgramUniform1i(program, MIN_MAX_MIPS_LEVEL_UNIFORM_LOC, i); u32 width = (u32)frame->dim.x >> i; u32 height = (u32)frame->dim.y >> i; @@ -923,7 +1051,8 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame assert(to_average == frame_count); - do_sum_shader(cc, in_textures, frame_count, 1 / (f32)frame_count, aframe->texture, aframe->dim); + glProgramUniform1f(program, SUM_PRESCALE_UNIFORM_LOC, 1 / (f32)frame_count); + do_sum_shader(cc, in_textures, frame_count, aframe->texture, aframe->dim); aframe->min_coordinate = frame->min_coordinate; aframe->max_coordinate = frame->max_coordinate; aframe->compound_count = frame->compound_count; @@ -933,60 +1062,11 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame } } -function void -stream_push_shader_header(Stream *s, ShaderReloadContext *ctx) -{ - BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + ctx->reloadable_info_index; - - stream_append_s8s(s, s8("#version 460 core\n\n"), ctx->header); - - switch (rsi->kind) { - case BeamformerShaderKind_Filter:{ - stream_append_s8(s, s8("" - "layout(local_size_x = " str(FILTER_LOCAL_SIZE_X) ", " - "local_size_y = " str(FILTER_LOCAL_SIZE_Y) ", " - "local_size_z = " str(FILTER_LOCAL_SIZE_Z) ") in;\n\n" - )); - }break; - case BeamformerShaderKind_DAS:{ - stream_append_s8(s, s8("" - "layout(local_size_x = " str(DAS_LOCAL_SIZE_X) ", " - "local_size_y = " str(DAS_LOCAL_SIZE_Y) ", " - "local_size_z = " str(DAS_LOCAL_SIZE_Z) ") in;\n\n" - "layout(location = " str(DAS_VOXEL_OFFSET_UNIFORM_LOC) ") uniform ivec3 u_voxel_offset;\n" - "layout(location = " str(DAS_CYCLE_T_UNIFORM_LOC) ") uniform uint u_cycle_t;\n" - "layout(location = " str(DAS_FAST_CHANNEL_UNIFORM_LOC) ") uniform int u_channel;\n\n" - )); - - #define X(k, id, ...) "#define ShaderKind_" #k " " #id "\n" - stream_append_s8s(s, s8(DAS_SHADER_KIND_LIST), s8("\n")); - #undef X - }break; - case BeamformerShaderKind_Decode:{ - stream_append_s8s(s, s8("" - "layout(local_size_x = " str(DECODE_LOCAL_SIZE_X) ", " - "local_size_y = " str(DECODE_LOCAL_SIZE_Y) ", " - "local_size_z = " str(DECODE_LOCAL_SIZE_Z) ") in;\n\n" - "layout(location = " str(DECODE_FIRST_PASS_UNIFORM_LOC) ") uniform bool u_first_pass;\n\n" - )); - }break; - case BeamformerShaderKind_MinMax:{ - stream_append_s8(s, s8("layout(location = " str(MIN_MAX_MIPS_LEVEL_UNIFORM_LOC) - ") uniform int u_mip_map;\n\n")); - }break; - case BeamformerShaderKind_Sum:{ - stream_append_s8(s, s8("layout(location = " str(SUM_PRESCALE_UNIFORM_LOC) - ") uniform float u_sum_prescale = 1.0;\n\n")); - }break; - default:{}break; - } -} - function s8 -shader_text_with_header(ShaderReloadContext *ctx, s8 filepath, Arena *arena) +shader_text_with_header(s8 header, s8 filepath, BeamformerShaderKind shader_kind, Arena *arena) { Stream sb = arena_stream(*arena); - stream_push_shader_header(&sb, ctx); + stream_push_shader_header(&sb, shader_kind, header); stream_append_s8(&sb, s8("\n#line 1\n")); s8 result = arena_stream_commit(arena, &sb); @@ -1000,10 +1080,12 @@ shader_text_with_header(ShaderReloadContext *ctx, s8 filepath, Arena *arena) } /* NOTE(rnp): currently this function is only handling rendering shaders. - * look at reload_compute_shader for compute shaders */ + * look at load_compute_shader for compute shaders */ DEBUG_EXPORT BEAMFORMER_RELOAD_SHADER_FN(beamformer_reload_shader) { - BeamformerCtx *ctx = src->beamformer_context; + BeamformerCtx *ctx = src->beamformer_context; + BeamformerShaderKind kind = beamformer_reloadable_shader_kinds[src->reloadable_info_index]; + assert(kind == BeamformerShaderKind_Render3D); i32 shader_count = 1; ShaderReloadContext *link = src->link; @@ -1016,15 +1098,12 @@ DEBUG_EXPORT BEAMFORMER_RELOAD_SHADER_FN(beamformer_reload_shader) do { s8 filepath = {0}; if (link->reloadable_info_index >= 0) filepath = path; - shader_texts[index] = shader_text_with_header(link, filepath, &arena); + shader_texts[index] = shader_text_with_header(link->header, filepath, kind, &arena); shader_types[index] = link->gl_type; index++; link = link->link; } while (link != src); - BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + src->reloadable_info_index; - assert(rsi->kind == BeamformerShaderKind_Render3D); - u32 *shader = &ctx->frame_view_render_context.shader; glDeleteProgram(*shader); *shader = load_shader(&ctx->os, arena, shader_texts, shader_types, shader_count, shader_name); @@ -1034,87 +1113,6 @@ DEBUG_EXPORT BEAMFORMER_RELOAD_SHADER_FN(beamformer_reload_shader) } function void -reload_compute_shader(BeamformerCtx *ctx, ShaderReloadContext *src, Arena arena) -{ - BeamformerComputeContext *cc = &ctx->compute_context; - BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + src->reloadable_info_index; - BeamformerShaderDescriptor *sd = beamformer_shader_descriptors + rsi->kind; - - Stream status = stream_alloc(&arena, 128); - u32 completed = 0; - u32 total_shaders = (u32)(sd->one_past_last_match_vector_index - sd->first_match_vector_index); - for (i32 i = 0; i < rsi->sub_shader_descriptor_index_count; i++) { - BeamformerShaderDescriptor *ssd = beamformer_shader_descriptors + rsi->sub_shader_descriptor_indices[i]; - total_shaders += (u32)(ssd->one_past_last_match_vector_index - ssd->first_match_vector_index); - } - - s8 path = push_s8_from_parts(&arena, ctx->os.path_separator, s8("shaders"), - beamformer_reloadable_shader_files[src->reloadable_info_index]); - s8 file_text = os_read_whole_file(&arena, (c8 *)path.data); - Stream shader = arena_stream(arena); - - stream_push_shader_header(&shader, src); - - stream_append_s8(&shader, beamformer_shader_local_header_strings[src->reloadable_info_index]); - - i32 save_point = shader.widx; - for (i32 sub_index = -1; sub_index < rsi->sub_shader_descriptor_index_count; sub_index++) { - shader.widx = save_point; - - if (sub_index != -1) - sd = beamformer_shader_descriptors + rsi->sub_shader_descriptor_indices[sub_index]; - - i32 *hvector = beamformer_shader_header_vectors[sd - beamformer_shader_descriptors]; - for (i32 index = 0; index < sd->header_vector_length; index++) - stream_append_s8s(&shader, beamformer_shader_global_header_strings[hvector[index]], s8("\n")); - - i32 instance_save_point = shader.widx; - arena_commit(&arena, instance_save_point); - TempArena arena_save = begin_temp_arena(&arena); - - for (i32 instance = sd->first_match_vector_index; - instance < sd->one_past_last_match_vector_index; - instance++) - { - shader.widx = instance_save_point; - end_temp_arena(arena_save); - - i32 *match_vector = beamformer_shader_match_vectors[instance]; - for (i32 index = 0; index < sd->match_vector_length; index++) { - stream_append_s8s(&shader, s8("#define "), beamformer_shader_descriptor_header_strings[hvector[index]], s8(" (")); - stream_append_i64(&shader, match_vector[index]); - stream_append_s8(&shader, s8(")\n")); - } - - if (sd->has_local_flags) { - stream_append_s8(&shader, s8("#define ShaderFlags (0x")); - stream_append_hex_u64(&shader, (u64)match_vector[sd->match_vector_length]); - stream_append_s8(&shader, s8(")\n")); - } - - stream_append_s8s(&shader, s8("\n#line 1\n"), file_text); - - arena_commit(&arena, shader.widx - instance_save_point); - - s8 shader_text = stream_to_s8(&shader); - /* TODO(rnp): instance name */ - s8 shader_name = beamformer_shader_names[rsi->kind]; - glDeleteProgram(cc->programs[instance]); - cc->programs[instance] = load_shader(&ctx->os, arena, &shader_text, &src->gl_type, 1, shader_name); - - status.widx = 0; - stream_append_s8s(&status, s8("\r\x1b[2Kloaded shader "), shader_name, s8(": [")); - stream_append_u64(&status, ++completed); - stream_append_s8s(&status, s8("/")); - stream_append_u64(&status, total_shaders); - stream_append_s8s(&status, s8("]")); - os_write_file(ctx->os.error_handle, stream_to_s8(&status)); - } - } - os_write_file(ctx->os.error_handle, s8("\n")); -} - -function void complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_context) { BeamformerComputeContext *cs = &ctx->compute_context; @@ -1125,7 +1123,10 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c b32 can_commit = 1; switch (work->kind) { case BeamformerWorkKind_ReloadShader:{ - reload_compute_shader(ctx, work->shader_reload_context, *arena); + u32 reserved_blocks = sm->reserved_parameter_blocks; + for (u32 i = 0; i < reserved_blocks; i++) + mark_parameter_block_region_dirty(sm, i, BeamformerParameterBlockRegion_ComputePipeline); + if (ctx->latest_frame && !sm->live_imaging_parameters.active) { fill_frame_compute_work(ctx, work, ctx->latest_frame->view_plane_tag, 0, 0); can_commit = 0; @@ -1230,8 +1231,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, rf->ssbo, slot * rf->active_rf_size, rf->active_rf_size); glBeginQuery(GL_TIME_ELAPSED, cc->shader_timer_ids[0]); - do_compute_shader(ctx, cp, frame, pipeline->shaders[0], pipeline->program_indices[0], - pipeline->parameters + 0, *arena); + do_compute_shader(ctx, cp, frame, pipeline->shaders[0], 0, pipeline->parameters + 0, *arena); glEndQuery(GL_TIME_ELAPSED); if (work->kind == BeamformerWorkKind_ComputeIndirect) { @@ -1245,8 +1245,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c for (u32 i = 1; i < pipeline->shader_count; i++) { did_sum_shader |= pipeline->shaders[i] == BeamformerShaderKind_Sum; glBeginQuery(GL_TIME_ELAPSED, cc->shader_timer_ids[i]); - do_compute_shader(ctx, cp, frame, pipeline->shaders[i], pipeline->program_indices[i], - pipeline->parameters + i, *arena); + do_compute_shader(ctx, cp, frame, pipeline->shaders[i], i, pipeline->parameters + i, *arena); glEndQuery(GL_TIME_ELAPSED); } diff --git a/beamformer.h b/beamformer.h @@ -204,6 +204,9 @@ typedef struct BeamformerComputePlan BeamformerComputePlan; struct BeamformerComputePlan { BeamformerComputePipeline pipeline; + u32 programs[BeamformerMaxComputeShaderStages]; + u32 shader_matches[BeamformerMaxComputeShaderStages]; + uv3 decode_dispatch; uv3 demod_dispatch; @@ -243,8 +246,6 @@ typedef struct { } BeamformerRFBuffer; typedef struct { - /* TODO(rnp): slightly oversized; remove non compute shaders from match vectors count */ - u32 programs[beamformer_match_vectors_count]; BeamformerRFBuffer rf_buffer; BeamformerComputePlan *compute_plans[BeamformerMaxParameterBlockSlots]; @@ -384,6 +385,7 @@ typedef struct { BeamformerFrame averaged_frames[2]; } BeamformerCtx; +typedef struct ShaderReloadContext ShaderReloadContext; struct ShaderReloadContext { BeamformerCtx *beamformer_context; ShaderReloadContext *link; diff --git a/beamformer_shared_memory.c b/beamformer_shared_memory.c @@ -1,8 +1,7 @@ /* See LICENSE for license details. */ -#define BEAMFORMER_SHARED_MEMORY_VERSION (15UL) +#define BEAMFORMER_SHARED_MEMORY_VERSION (16UL) -typedef struct BeamformerFrame BeamformerFrame; -typedef struct ShaderReloadContext ShaderReloadContext; +typedef struct BeamformerFrame BeamformerFrame; typedef enum { BeamformerWorkKind_Compute, @@ -74,7 +73,6 @@ typedef struct { BeamformerComputeIndirectWorkContext compute_indirect_context; BeamformerCreateFilterContext create_filter_context; BeamformerExportContext export_context; - ShaderReloadContext *shader_reload_context; }; } BeamformWork; @@ -117,7 +115,6 @@ typedef union { typedef struct { BeamformerShaderKind shaders[BeamformerMaxComputeShaderStages]; BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages]; - u32 program_indices[BeamformerMaxComputeShaderStages]; u32 shader_count; BeamformerDataKind data_kind; } BeamformerComputePipeline; diff --git a/build.c b/build.c @@ -779,6 +779,7 @@ meta_push_(MetaprogramContext *m, s8 *items, iz count) #define meta_begin_scope(m, ...) do { meta_push_line(m, __VA_ARGS__); (m)->indentation_level++; } while(0) #define meta_end_scope(m, ...) do { (m)->indentation_level--; meta_push_line(m, __VA_ARGS__); } while(0) #define meta_push_u64(m, n) stream_append_u64(&(m)->stream, (n)) +#define meta_push_i64(m, n) stream_append_i64(&(m)->stream, (n)) #define meta_push_u64_hex(m, n) stream_append_hex_u64(&(m)->stream, (n)) #define meta_begin_matlab_class_cracker(_1, _2, FN, ...) FN @@ -935,9 +936,9 @@ global jmp_buf compiler_jmp_buf; } while(0) #define meta_entry_error_location(e, loc, ...) do { \ - meta_compiler_error_message((loc), __VA_ARGS__); \ - meta_entry_print((e), 1, (i32)(loc).column); \ - meta_error(); \ + meta_compiler_error_message((loc), __VA_ARGS__); \ + meta_entry_print((e), 1, (i32)(loc).column); \ + meta_error(); \ } while (0) function no_return void @@ -1832,14 +1833,7 @@ meta_push_shader_reload_info(MetaprogramContext *m, MetaContext *ctx) { /////////////////////////////// // NOTE(rnp): reloadable infos - i32 max_shader_name_length = 0; - for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { - if (ctx->base_shaders.data[shader].file.len == 0) continue; - s8 name = ctx->shader_names.data[ctx->base_shaders.data[shader].shader->base_name_id]; - max_shader_name_length = MAX((i32)name.len, max_shader_name_length); - } - - meta_begin_scope(m, s8("read_only global BeamformerReloadableShaderInfo beamformer_reloadable_shader_infos[] = {")); + meta_begin_scope(m, s8("read_only global BeamformerShaderKind beamformer_reloadable_shader_kinds[] = {")); for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { MetaBaseShader *bs = ctx->base_shaders.data + shader; MetaShader *s = bs->shader; @@ -1847,21 +1841,7 @@ meta_push_shader_reload_info(MetaprogramContext *m, MetaContext *ctx) if (bs->file.len == 0) continue; s8 name = ctx->shader_names.data[s->base_name_id]; - meta_begin_line(m, s8("{BeamformerShaderKind_"), name, s8(", ")); - meta_pad(m, ' ', max_shader_name_length - (i32)name.len); - meta_push_u64(m, (u64)bs->sub_shaders.count); - - if (bs->sub_shaders.count) { - meta_push(m, s8(", (i32 []){")); - for (iz sub_shader = 0; sub_shader < bs->sub_shaders.count; sub_shader++) { - if (sub_shader != 0) meta_push(m, s8(", ")); - meta_push_u64(m, bs->sub_shaders.data[sub_shader]); - } - meta_push(m, s8("}")); - } else { - meta_push(m, s8(", 0")); - } - meta_end_line(m, s8("},")); + meta_push_line(m, s8("BeamformerShaderKind_"), name, s8(",")); } meta_end_scope(m, s8("};\n")); @@ -1874,6 +1854,21 @@ meta_push_shader_reload_info(MetaprogramContext *m, MetaContext *ctx) meta_end_scope(m, s8("};\n")); { + i32 rolling_index = 0; + meta_begin_scope(m, s8("read_only global i32 beamformer_shader_reloadable_index_by_shader[] = {")); + for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { + MetaBaseShader *bs = ctx->base_shaders.data + shader; + i32 index = bs->file.len == 0 ? -1 : rolling_index++; + for (i32 sub_shader = -1; sub_shader < bs->sub_shaders.count; sub_shader++) { + meta_indent(m); + meta_push_i64(m, index); + meta_end_line(m, s8(",")); + } + } + meta_end_scope(m, s8("};\n")); + } + + { u32 info_index = 0; for (iz group = 0; group < ctx->shader_groups.count; group++) { MetaShaderGroup *sg = ctx->shader_groups.data + group; @@ -2070,17 +2065,6 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena) metagen_push_c_struct(m, name, types, countof(types), names, countof(names)); } - { - s8 name = s8_comp("BeamformerReloadableShaderInfo"); - s8 types[] = {s8_comp("BeamformerShaderKind"), s8_comp("i32"), s8_comp("i32 *")}; - s8 names[] = { - s8_comp("kind"), - s8_comp("sub_shader_descriptor_index_count"), - s8_comp("sub_shader_descriptor_indices"), - }; - metagen_push_c_struct(m, name, types, countof(types), names, countof(names)); - } - /////////////////////////////////////// // NOTE(rnp): shader descriptor tables i32 match_vectors_count = 0; diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c @@ -73,12 +73,6 @@ typedef struct { b32 has_local_flags; } BeamformerShaderDescriptor; -typedef struct { - BeamformerShaderKind kind; - i32 sub_shader_descriptor_index_count; - i32 * sub_shader_descriptor_indices; -} BeamformerReloadableShaderInfo; - read_only global i32 *beamformer_shader_match_vectors[] = { // CudaDecode 0, @@ -178,13 +172,13 @@ read_only global s8 beamformer_shader_names[] = { s8_comp("Render3D"), }; -read_only global BeamformerReloadableShaderInfo beamformer_reloadable_shader_infos[] = { - {BeamformerShaderKind_Decode, 0, 0}, - {BeamformerShaderKind_Filter, 1, (i32 []){4}}, - {BeamformerShaderKind_DAS, 0, 0}, - {BeamformerShaderKind_MinMax, 0, 0}, - {BeamformerShaderKind_Sum, 0, 0}, - {BeamformerShaderKind_Render3D, 0, 0}, +read_only global BeamformerShaderKind beamformer_reloadable_shader_kinds[] = { + BeamformerShaderKind_Decode, + BeamformerShaderKind_Filter, + BeamformerShaderKind_DAS, + BeamformerShaderKind_MinMax, + BeamformerShaderKind_Sum, + BeamformerShaderKind_Render3D, }; read_only global s8 beamformer_reloadable_shader_files[] = { @@ -196,6 +190,18 @@ read_only global s8 beamformer_reloadable_shader_files[] = { s8_comp("render_3d.frag.glsl"), }; +read_only global i32 beamformer_shader_reloadable_index_by_shader[] = { + -1, + -1, + 0, + 1, + 1, + 2, + 3, + 4, + 5, +}; + read_only global i32 beamformer_reloadable_compute_shader_info_indices[] = { 0, 1, diff --git a/static.c b/static.c @@ -170,19 +170,17 @@ dump_gl_params(GLParams *gl, Arena a, OS *os) function FILE_WATCH_CALLBACK_FN(reload_shader) { - ShaderReloadContext *ctx = (typeof(ctx))user_data; - BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + ctx->reloadable_info_index; - return beamformer_reload_shader(os, path, ctx, arena, beamformer_shader_names[rsi->kind]); + ShaderReloadContext *ctx = (typeof(ctx))user_data; + BeamformerShaderKind kind = beamformer_reloadable_shader_kinds[ctx->reloadable_info_index]; + return beamformer_reload_shader(os, path, ctx, arena, beamformer_shader_names[kind]); } function FILE_WATCH_CALLBACK_FN(reload_shader_indirect) { - ShaderReloadContext *src = (typeof(src))user_data; - BeamformerCtx *ctx = src->beamformer_context; + BeamformerCtx *ctx = (BeamformerCtx *)user_data; BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue); if (work) { work->kind = BeamformerWorkKind_ReloadShader, - work->shader_reload_context = src; beamform_work_queue_push_commit(ctx->beamform_work_queue); os_wake_waiters(&os->compute_worker.sync_variable); } @@ -415,40 +413,13 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input glEnable(GL_DEBUG_OUTPUT); #endif - read_only local_persist s8 compute_headers[BeamformerShaderKind_ComputeCount] = { - /* X(name, type, gltype) */ - #define X(name, t, gltype) "\t" #gltype " " #name ";\n" - [BeamformerShaderKind_DAS] = s8_comp("layout(std140, binding = 0) uniform parameters {\n" - BEAMFORMER_DAS_UBO_PARAM_LIST - "};\n\n" - ), - [BeamformerShaderKind_Decode] = s8_comp("layout(std140, binding = 0) uniform parameters {\n" - BEAMFORMER_DECODE_UBO_PARAM_LIST - "};\n\n" - ), - [BeamformerShaderKind_Filter] = s8_comp("layout(std140, binding = 0) uniform parameters {\n" - BEAMFORMER_FILTER_UBO_PARAM_LIST - "};\n\n" - ), - #undef X - }; - for EachElement(beamformer_reloadable_compute_shader_info_indices, it) { i32 index = beamformer_reloadable_compute_shader_info_indices[it]; Arena temp = scratch; - s8 file = push_s8_from_parts(&temp, s8(OS_PATH_SEPARATOR), s8("shaders"), beamformer_reloadable_shader_files[index]); - - BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + index; - ShaderReloadContext *src = push_struct(memory, typeof(*src)); - src->beamformer_context = ctx; - src->reloadable_info_index = index; - src->link = src; - src->header = compute_headers[rsi->kind]; - src->gl_type = GL_COMPUTE_SHADER; - os_add_file_watch(&ctx->os, memory, file, reload_shader_indirect, (iptr)src); - reload_shader_indirect(&ctx->os, file, (iptr)src, *memory); + os_add_file_watch(&ctx->os, memory, file, reload_shader_indirect, (iptr)ctx); + reload_shader_indirect(&ctx->os, file, (iptr)ctx, *memory); } os_wake_waiters(&worker->sync_variable); diff --git a/ui.c b/ui.c @@ -2792,7 +2792,6 @@ draw_compute_stats_view(BeamformerUI *ui, Arena arena, Variable *view, Rect r, v result = v2_add(result, table_extent(table, arena, text_spec.font)); u32 row_index = 0; - u32 *programs = ui->beamformer_context->compute_context.programs; TableIterator *it = table_iterator_new(table, TIK_ROWS, &arena, 0, r.pos, text_spec.font); for (TableRow *row = table_iterator_next(it, &arena); row; @@ -2805,7 +2804,7 @@ draw_compute_stats_view(BeamformerUI *ui, Arena arena, Variable *view, Rect r, v cell_rect.size.w = t->widths[column]; text_spec.limits.size.w = r.size.w - (cell_rect.pos.x - it->start_x); - if (column == 0 && row_index < stages && programs[cp->pipeline.shaders[row_index]] == 0 && + if (column == 0 && row_index < stages && cp->programs[row_index] == 0 && cp->pipeline.shaders[row_index] != BeamformerShaderKind_CudaHilbert && cp->pipeline.shaders[row_index] != BeamformerShaderKind_CudaDecode) {