ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 47d14ce3869cf38781e912453cd7c86286c8c68e
Parent: 00832c3949982d9ef4e717cba080629c50f91610
Author: Randy Palamar
Date:   Wed,  3 Sep 2025 13:35:01 -0600

build: add meta program

Currently this handles:
  - generating a list of shader variations with baked parameters
    - these are measured to be more performant than behaviour modification
      through values specified at runtime
  - a set of enums that are required by the shaders
    - some of which are global and used below
  - MATLAB bindings based on global enums
  - modified helper library header based on global enums

No optimization was performed on the "compiler" so it may get slow
as the meta file expands. A fair amount of work was done to ensure
that information that is shared between shader variations is not
duplicated over and over.

The generated C code is committed so that it is clear what parts
of the actual compiled program have changed when something in the
meta file or "compiler" is modified.

Diffstat:
M.gitignore | 4+++-
Mbeamformer.c | 423+++++++++++++++++++++++++++++++++++++------------------------------------------
Mbeamformer.h | 41++++++++++++++++-------------------------
Abeamformer.meta | 45+++++++++++++++++++++++++++++++++++++++++++++
Mbeamformer_parameters.h | 64+---------------------------------------------------------------
Mbeamformer_shared_memory.c | 1+
Mbuild.c | 1777++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Agenerated/beamformer.meta.c | 278+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mhelpers/ogl_beamformer_lib.c | 3++-
Mintrinsics.c | 19+++++++++++++++++++
Mos_linux.c | 8++++----
Mos_win32.c | 8++++----
Mshaders/das.glsl | 32+++++++++++++++-----------------
Mshaders/decode.glsl | 6+++---
Mshaders/filter.glsl | 20+++++++++++---------
Mstatic.c | 63+++++++++++++++++++++++++++++++++++++--------------------------
Mui.c | 15++++-----------
Mutil.c | 50++++++++++++++++++++++++++++++++++++++++++++------
Mutil.h | 10++++++++--
Mutil_gl.c | 9++-------
20 files changed, 2322 insertions(+), 554 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,9 +1,11 @@ * !.github -!tests !external +!generated !helpers !shaders +!tests !*.c !*.glsl !*.h +!*.meta diff --git a/beamformer.c b/beamformer.c @@ -1,9 +1,5 @@ /* See LICENSE for license details. */ /* TODO(rnp): - * [ ]: filter shader specializations need to be generated per sample mode - * - performance was measured with a switch on sampling mode and the perfomance gained - * is 80% worse than just having a baked in sampling mode - * - should also include channel mapping just in case * [ ]: make decode output real values for real inputs and complex values for complex inputs * - this means that das should have a RF version and an IQ version * - this will also flip the current hack to support demodulate after decode to @@ -469,48 +465,61 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) u32 shader = pb->pipeline.shaders[i]; b32 commit = 0; + iz match = 0; switch (shader) { case BeamformerShaderKind_CudaHilbert:{ commit = run_cuda_hilbert; }break; case BeamformerShaderKind_Decode:{ - BeamformerShaderKind decode_table[] = { - [BeamformerDataKind_Int16] = BeamformerShaderKind_Decode, - [BeamformerDataKind_Int16Complex] = BeamformerShaderKind_DecodeInt16Complex, - [BeamformerDataKind_Float32] = BeamformerShaderKind_DecodeFloat, - [BeamformerDataKind_Float32Complex] = BeamformerShaderKind_DecodeFloatComplex, - }; - if (decode_first && demodulate) { - /* TODO(rnp): for now we assume that if we are demodulating the data is int16 */ - shader = BeamformerShaderKind_DecodeInt16ToFloat; - } else if (decode_first) { - shader = decode_table[CLAMP(data_kind, 0, countof(decode_table) - 1)]; - } else { - if (data_kind == BeamformerDataKind_Int16) - shader = BeamformerShaderKind_DecodeInt16Complex; - else - shader = BeamformerShaderKind_DecodeFloatComplex; + /* TODO(rnp): rework decode first and demodulate after */ + BeamformerDataKind decode_data_kind = data_kind; + if (!decode_first) { + if (data_kind == BeamformerDataKind_Int16) { + decode_data_kind = BeamformerDataKind_Int16Complex; + } else { + decode_data_kind = BeamformerDataKind_Float32Complex; + } } + match = beamformer_shader_decode_match(decode_data_kind); commit = 1; }break; case BeamformerShaderKind_Demodulate:{ BeamformerFilter *f = cp->filters + sp->filter_slot; - if (decode_first || (!decode_first && data_kind == BeamformerDataKind_Float32)) { - if (f->parameters.complex) shader = BeamformerShaderKind_DemodulateFloatCF; - else shader = BeamformerShaderKind_DemodulateFloat; - } else if (f->parameters.complex) { - shader = BeamformerShaderKind_DemodulateCF; - } + i32 local_flags = BeamformerShaderFilterFlags_Demodulate; + if (f->parameters.complex) local_flags |= BeamformerShaderFilterFlags_ComplexFilter; + if (!decode_first) local_flags |= BeamformerShaderFilterFlags_MapChannels; + + BeamformerDataKind filter_data_kind = data_kind; + if (decode_first) + filter_data_kind = BeamformerDataKind_Float32; + + match = beamformer_shader_demodulate_match(filter_data_kind, pb->parameters.sampling_mode, local_flags); + bp->time_offset += f->time_delay; commit = 1; }break; case BeamformerShaderKind_Filter:{ BeamformerFilter *f = cp->filters + sp->filter_slot; - if (f->parameters.complex) shader = BeamformerShaderKind_FilterCF; + i32 local_flags = 0; + if (f->parameters.complex) local_flags |= BeamformerShaderFilterFlags_ComplexFilter; + + BeamformerDataKind filter_data_kind = data_kind; + if (decode_first) + filter_data_kind = BeamformerDataKind_Float32; + + match = beamformer_shader_filter_match(filter_data_kind, local_flags); bp->time_offset += f->time_delay; commit = 1; }break; case BeamformerShaderKind_DAS:{ + BeamformerDataKind das_data_kind = BeamformerDataKind_Float32; + if (demodulate || run_cuda_hilbert) + das_data_kind = BeamformerDataKind_Float32Complex; + + i32 local_flags = 0; if ((bp->shader_flags & DASShaderFlags_CoherencyWeighting) == 0) - shader = BeamformerShaderKind_DASFast; + local_flags |= BeamformerShaderDASFlags_Fast; + if (bp->shader_kind == DASShaderKind_UFORCES || bp->shader_kind == DASShaderKind_UHERCULES) + local_flags |= BeamformerShaderDASFlags_Sparse; + match = beamformer_shader_das_match(das_data_kind, local_flags); commit = 1; }break; default:{ commit = 1; }break; @@ -518,8 +527,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) if (commit) { u32 index = cp->pipeline.shader_count++; - cp->pipeline.shaders[index] = shader; - cp->pipeline.parameters[index] = *sp; + cp->pipeline.shaders[index] = shader; + cp->pipeline.program_indices[index] = (u32)match; + cp->pipeline.parameters[index] = *sp; } } cp->pipeline.data_kind = data_kind; @@ -578,8 +588,6 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) mp->sampling_frequency = bp->sampling_frequency / 2; mp->decimation_rate = decimation_rate; - if (!decode_first) mp->shader_flags |= FilterShaderFlags_MapChannels; - bp->sampling_frequency /= 2 * (f32)mp->decimation_rate; bp->sample_count /= 2 * mp->decimation_rate; @@ -618,7 +626,6 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) flt->demodulation_frequency = bp->demodulation_frequency; flt->sampling_frequency = bp->sampling_frequency; flt->decimation_rate = 1; - flt->shader_flags = pb->parameters.sampling_mode & FilterShaderFlags_SamplingModeMask; flt->output_channel_stride = bp->sample_count * bp->acquisition_count; flt->output_sample_stride = 1; flt->output_transmit_stride = bp->sample_count; @@ -713,23 +720,21 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, function void do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame *frame, - BeamformerShaderKind shader, BeamformerShaderParameters *sp, Arena arena) + BeamformerShaderKind shader, u32 program_index, BeamformerShaderParameters *sp, Arena arena) { BeamformerComputeContext *cc = &ctx->compute_context; - u32 program = cc->programs[shader]; + i32 *match_vector = beamformer_shader_match_vectors[program_index]; + BeamformerShaderDescriptor *shader_descriptor = beamformer_shader_descriptors + shader; + + u32 program = cc->programs[program_index]; glUseProgram(program); u32 output_ssbo_idx = !cc->last_output_ssbo_index; u32 input_ssbo_idx = cc->last_output_ssbo_index; switch (shader) { - case BeamformerShaderKind_Decode: - case BeamformerShaderKind_DecodeInt16Complex: - case BeamformerShaderKind_DecodeFloat: - case BeamformerShaderKind_DecodeFloatComplex: - case BeamformerShaderKind_DecodeInt16ToFloat: - { + case BeamformerShaderKind_Decode:{ glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[BeamformerComputeUBOKind_Decode]); glBindImageTexture(0, cp->textures[BeamformerComputeTextureKind_Hadamard], 0, 0, 0, GL_READ_ONLY, GL_R8I); @@ -761,26 +766,23 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame cc->last_output_ssbo_index = !cc->last_output_ssbo_index; }break; case BeamformerShaderKind_Filter: - case BeamformerShaderKind_FilterCF: case BeamformerShaderKind_Demodulate: - case BeamformerShaderKind_DemodulateCF: - case BeamformerShaderKind_DemodulateFloat: - case BeamformerShaderKind_DemodulateFloatCF: { - BeamformerFilterUBO *ubo = &cp->demod_ubo_data; - if (shader == BeamformerShaderKind_Filter) - ubo = &cp->filter_ubo_data; + i32 local_flags = match_vector[shader_descriptor->match_vector_length]; + b32 map_channels = (local_flags & BeamformerShaderFilterFlags_MapChannels) != 0; u32 index = shader == BeamformerShaderKind_Filter ? BeamformerComputeUBOKind_Filter : BeamformerComputeUBOKind_Demodulate; glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[index]); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, cc->ping_pong_ssbos[output_ssbo_idx]); - if ((ubo->shader_flags & FilterShaderFlags_MapChannels) == 0) + + if (!map_channels) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, cc->ping_pong_ssbos[input_ssbo_idx]); GLenum kind = cp->filters[sp->filter_slot].parameters.complex? GL_RG32F : GL_R32F; glBindImageTexture(0, cp->filters[sp->filter_slot].texture, 0, 0, 0, GL_READ_ONLY, kind); - if (ubo->shader_flags & FilterShaderFlags_MapChannels) + + if (map_channels) glBindImageTexture(1, cp->textures[BeamformerComputeTextureKind_ChannelMapping], 0, 0, 0, GL_READ_ONLY, GL_R16I); glDispatchCompute(cp->demod_dispatch.x, cp->demod_dispatch.y, cp->demod_dispatch.z); @@ -801,11 +803,14 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); } }break; - case BeamformerShaderKind_DAS: - case BeamformerShaderKind_DASFast: - { + case BeamformerShaderKind_DAS:{ BeamformerDASUBO *ubo = &cp->das_ubo_data; - if (shader == BeamformerShaderKind_DASFast) { + + i32 local_flags = match_vector[shader_descriptor->match_vector_length]; + b32 fast = (local_flags & BeamformerShaderDASFlags_Fast) != 0; + b32 sparse = (local_flags & BeamformerShaderDASFlags_Sparse) != 0; + + if (fast) { glClearTexImage(frame->texture, 0, GL_RED, GL_FLOAT, 0); glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT); glBindImageTexture(0, frame->texture, 0, GL_TRUE, 0, GL_READ_WRITE, GL_RG32F); @@ -813,14 +818,17 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame glBindImageTexture(0, frame->texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F); } + u32 sparse_texture = cp->textures[BeamformerComputeTextureKind_SparseElements]; + if (!sparse) sparse_texture = 0; + glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[BeamformerComputeUBOKind_DAS]); glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, cc->ping_pong_ssbos[input_ssbo_idx], 0, cp->rf_size); - glBindImageTexture(1, cp->textures[BeamformerComputeTextureKind_SparseElements], 0, 0, 0, GL_READ_ONLY, GL_R16I); - glBindImageTexture(2, cp->textures[BeamformerComputeTextureKind_FocalVectors], 0, 0, 0, GL_READ_ONLY, GL_RG32F); + glBindImageTexture(1, sparse_texture, 0, 0, 0, GL_READ_ONLY, GL_R16I); + glBindImageTexture(2, cp->textures[BeamformerComputeTextureKind_FocalVectors], 0, 0, 0, GL_READ_ONLY, GL_RG32F); glProgramUniform1ui(program, DAS_CYCLE_T_UNIFORM_LOC, cycle_t++); - if (shader == BeamformerShaderKind_DASFast) { + if (fast) { i32 loop_end; if (ubo->shader_kind == DASShaderKind_RCA_VLS || ubo->shader_kind == DASShaderKind_RCA_TPW) @@ -838,9 +846,9 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame /* IMPORTANT(rnp): prevents OS from coalescing and killing our shader */ glFinish(); glProgramUniform1i(program, DAS_FAST_CHANNEL_UNIFORM_LOC, index); - glDispatchCompute((u32)ceil_f32((f32)frame->dim.x / DAS_FAST_LOCAL_SIZE_X), - (u32)ceil_f32((f32)frame->dim.y / DAS_FAST_LOCAL_SIZE_Y), - (u32)ceil_f32((f32)frame->dim.z / DAS_FAST_LOCAL_SIZE_Z)); + glDispatchCompute((u32)ceil_f32((f32)frame->dim.x / DAS_LOCAL_SIZE_X), + (u32)ceil_f32((f32)frame->dim.y / DAS_LOCAL_SIZE_Y), + (u32)ceil_f32((f32)frame->dim.z / DAS_LOCAL_SIZE_Z)); glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); } } else { @@ -902,118 +910,41 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame } } -function s8 -shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena) +function void +stream_push_shader_header(Stream *s, ShaderReloadContext *ctx) { - Stream sb = arena_stream(*arena); - stream_append_s8s(&sb, s8("#version 460 core\n\n"), ctx->header); + BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + ctx->reloadable_info_index; - switch (ctx->kind) { - case BeamformerShaderKind_Filter: - case BeamformerShaderKind_FilterCF: - case BeamformerShaderKind_Demodulate: - case BeamformerShaderKind_DemodulateCF: - case BeamformerShaderKind_DemodulateFloat: - case BeamformerShaderKind_DemodulateFloatCF: - { - stream_append_s8(&sb, s8("" + stream_append_s8s(s, s8("#version 460 core\n\n"), ctx->header); + + switch (rsi->kind) { + case BeamformerShaderKind_Filter:{ + stream_append_s8(s, s8("" "layout(local_size_x = " str(FILTER_LOCAL_SIZE_X) ", " "local_size_y = " str(FILTER_LOCAL_SIZE_Y) ", " "local_size_z = " str(FILTER_LOCAL_SIZE_Z) ") in;\n\n" )); - - switch (ctx->kind) { - case BeamformerShaderKind_FilterCF: - case BeamformerShaderKind_DemodulateCF: - case BeamformerShaderKind_DemodulateFloatCF: - { - stream_append_s8(&sb, s8("#define COMPLEX_FILTER 1\n")); - }break; - default:{ - stream_append_s8(&sb, s8("#define COMPLEX_FILTER 0\n")); - }break; - } - - switch (ctx->kind) { - case BeamformerShaderKind_Filter: - case BeamformerShaderKind_FilterCF: - case BeamformerShaderKind_DemodulateFloat: - case BeamformerShaderKind_DemodulateFloatCF: - { - stream_append_s8(&sb, s8("#define INPUT_DATA_TYPE_FLOAT\n")); - }break; - default:{}break; - } - - switch (ctx->kind) { - case BeamformerShaderKind_Demodulate: - case BeamformerShaderKind_DemodulateCF: - case BeamformerShaderKind_DemodulateFloat: - case BeamformerShaderKind_DemodulateFloatCF: - { - stream_append_s8(&sb, s8("#define DEMODULATE\n")); - }break; - default:{}break; - } - - stream_append_byte(&sb, '\n'); - #define X(k, v, ...) "#define ShaderFlags_" #k " " #v "\n" - stream_append_s8(&sb, s8(FILTER_SHADER_FLAGS_LIST)); - #undef X - - stream_append_byte(&sb, '\n'); - #define X(k, v, ...) "#define SamplingMode_" #k " " #v "\n" - stream_append_s8(&sb, s8(SAMPLING_MODES_LIST)); - #undef X - stream_append_byte(&sb, '\n'); }break; - case BeamformerShaderKind_DAS: - case BeamformerShaderKind_DASFast: - { - if (ctx->kind == BeamformerShaderKind_DAS) { - stream_append_s8(&sb, s8("" - "layout(local_size_x = " str(DAS_LOCAL_SIZE_X) ", " - "local_size_y = " str(DAS_LOCAL_SIZE_Y) ", " - "local_size_z = " str(DAS_LOCAL_SIZE_Z) ") in;\n\n" - "#define DAS_FAST 0\n\n" - "layout(location = " str(DAS_VOXEL_OFFSET_UNIFORM_LOC) ") uniform ivec3 u_voxel_offset;\n" - )); - } else { - stream_append_s8(&sb, s8("" - "layout(local_size_x = " str(DAS_FAST_LOCAL_SIZE_X) ", " - "local_size_y = " str(DAS_FAST_LOCAL_SIZE_Y) ", " - "local_size_z = " str(DAS_FAST_LOCAL_SIZE_Z) ") in;\n\n" - "#define DAS_FAST 1\n\n" - "layout(location = " str(DAS_FAST_CHANNEL_UNIFORM_LOC) ") uniform int u_channel;\n" - )); - } - stream_append_s8(&sb, s8("" - "layout(location = " str(DAS_CYCLE_T_UNIFORM_LOC) ") uniform uint u_cycle_t;\n\n")); + case BeamformerShaderKind_DAS:{ + stream_append_s8(s, s8("" + "layout(local_size_x = " str(DAS_LOCAL_SIZE_X) ", " + "local_size_y = " str(DAS_LOCAL_SIZE_Y) ", " + "local_size_z = " str(DAS_LOCAL_SIZE_Z) ") in;\n\n" + "layout(location = " str(DAS_VOXEL_OFFSET_UNIFORM_LOC) ") uniform ivec3 u_voxel_offset;\n" + "layout(location = " str(DAS_CYCLE_T_UNIFORM_LOC) ") uniform uint u_cycle_t;\n" + "layout(location = " str(DAS_FAST_CHANNEL_UNIFORM_LOC) ") uniform int u_channel;\n\n" + )); - #define X(k, v, ...) "#define ShaderFlags_" #k " " #v "\n" - stream_append_s8(&sb, s8(DAS_SHADER_FLAGS_LIST)); + #define X(k, id, ...) "#define ShaderFlags_" #k " " #id "\n" + stream_append_s8s(s, s8(DAS_SHADER_FLAGS_LIST), s8("\n")); #undef X - - stream_append_byte(&sb, '\n'); - #define X(k, id, ...) "#define ShaderKind_" #k " " #id "\n" - stream_append_s8(&sb, s8(DAS_SHADER_KIND_LIST)); + stream_append_s8s(s, s8(DAS_SHADER_KIND_LIST), s8("\n")); #undef X }break; - case BeamformerShaderKind_Decode: - case BeamformerShaderKind_DecodeFloat: - case BeamformerShaderKind_DecodeFloatComplex: - case BeamformerShaderKind_DecodeInt16Complex: - case BeamformerShaderKind_DecodeInt16ToFloat: - { - s8 define_table[] = { - [BeamformerShaderKind_DecodeFloatComplex] = s8("#define INPUT_DATA_TYPE_FLOAT_COMPLEX\n\n"), - [BeamformerShaderKind_DecodeFloat] = s8("#define INPUT_DATA_TYPE_FLOAT\n\n"), - [BeamformerShaderKind_DecodeInt16Complex] = s8("#define INPUT_DATA_TYPE_INT16_COMPLEX\n\n"), - [BeamformerShaderKind_DecodeInt16ToFloat] = s8("#define OUTPUT_DATA_TYPE_FLOAT\n\n"), - }; + case BeamformerShaderKind_Decode:{ #define X(type, id, pretty) "#define DECODE_MODE_" #type " " #id "\n" - stream_append_s8s(&sb, define_table[ctx->kind], s8("" + stream_append_s8s(s, s8("" "layout(local_size_x = " str(DECODE_LOCAL_SIZE_X) ", " "local_size_y = " str(DECODE_LOCAL_SIZE_Y) ", " "local_size_z = " str(DECODE_LOCAL_SIZE_Z) ") in;\n\n" @@ -1023,20 +954,27 @@ shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena) #undef X }break; case BeamformerShaderKind_MinMax:{ - stream_append_s8(&sb, s8("layout(location = " str(MIN_MAX_MIPS_LEVEL_UNIFORM_LOC) - ") uniform int u_mip_map;\n\n")); + stream_append_s8(s, s8("layout(location = " str(MIN_MAX_MIPS_LEVEL_UNIFORM_LOC) + ") uniform int u_mip_map;\n\n")); }break; case BeamformerShaderKind_Sum:{ - stream_append_s8(&sb, s8("layout(location = " str(SUM_PRESCALE_UNIFORM_LOC) - ") uniform float u_sum_prescale = 1.0;\n\n")); + stream_append_s8(s, s8("layout(location = " str(SUM_PRESCALE_UNIFORM_LOC) + ") uniform float u_sum_prescale = 1.0;\n\n")); }break; default:{}break; } +} + +function s8 +shader_text_with_header(ShaderReloadContext *ctx, s8 filepath, Arena *arena) +{ + Stream sb = arena_stream(*arena); + stream_push_shader_header(&sb, ctx); stream_append_s8(&sb, s8("\n#line 1\n")); s8 result = arena_stream_commit(arena, &sb); - if (ctx->path.len) { - s8 file = os_read_whole_file(arena, (c8 *)ctx->path.data); + if (filepath.len > 0) { + s8 file = os_read_whole_file(arena, (c8 *)filepath.data); assert(file.data == result.data + result.len); result.len += file.len; } @@ -1046,6 +984,8 @@ shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena) DEBUG_EXPORT BEAMFORMER_RELOAD_SHADER_FN(beamformer_reload_shader) { + BeamformerCtx *ctx = src->beamformer_context; + i32 shader_count = 1; ShaderReloadContext *link = src->link; while (link != src) { shader_count++; link = link->link; } @@ -1055,27 +995,105 @@ DEBUG_EXPORT BEAMFORMER_RELOAD_SHADER_FN(beamformer_reload_shader) i32 index = 0; do { - shader_texts[index] = shader_text_with_header(link, os, &arena); + s8 filepath = {0}; + if (link->reloadable_info_index >= 0) filepath = path; + shader_texts[index] = shader_text_with_header(link, filepath, &arena); shader_types[index] = link->gl_type; index++; link = link->link; } while (link != src); - glDeleteProgram(*src->shader); - *src->shader = load_shader(&ctx->os, arena, shader_texts, shader_types, shader_count, shader_name); - if (src->kind == BeamformerShaderKind_Render3D) ctx->frame_view_render_context.updated = 1; + BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + src->reloadable_info_index; + u32 *shader = ctx->compute_context.programs + rsi->kind; + if (rsi->kind == BeamformerShaderKind_Render3D) + shader = &ctx->frame_view_render_context.shader; + + glDeleteProgram(*shader); + *shader = load_shader(&ctx->os, arena, shader_texts, shader_types, shader_count, shader_name); + if (rsi->kind == BeamformerShaderKind_Render3D) ctx->frame_view_render_context.updated = 1; return 1; } -function b32 -reload_compute_shader(BeamformerCtx *ctx, ShaderReloadContext *src, s8 name_extra, Arena arena) +function void +reload_compute_shader(BeamformerCtx *ctx, ShaderReloadContext *src, Arena arena) { - Stream sb = arena_stream(arena); - stream_append_s8s(&sb, src->name, name_extra); - s8 name = arena_stream_commit(&arena, &sb); - b32 result = beamformer_reload_shader(&ctx->os, ctx, src, arena, name); - return result; + BeamformerComputeContext *cc = &ctx->compute_context; + BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + src->reloadable_info_index; + BeamformerShaderDescriptor *sd = beamformer_shader_descriptors + rsi->kind; + + Stream status = stream_alloc(&arena, 128); + u32 completed = 0; + u32 total_shaders = (u32)(sd->one_past_last_match_vector_index - sd->first_match_vector_index); + for (i32 i = 0; i < rsi->sub_shader_descriptor_index_count; i++) { + BeamformerShaderDescriptor *ssd = beamformer_shader_descriptors + rsi->sub_shader_descriptor_indices[i]; + total_shaders += (u32)(ssd->one_past_last_match_vector_index - ssd->first_match_vector_index); + } + + s8 path = push_s8_from_parts(&arena, ctx->os.path_separator, s8("shaders"), + beamformer_reloadable_shader_files[src->reloadable_info_index]); + s8 file_text = os_read_whole_file(&arena, (c8 *)path.data); + Stream shader = arena_stream(arena); + + stream_push_shader_header(&shader, src); + + stream_append_s8(&shader, beamformer_shader_local_header_strings[src->reloadable_info_index]); + + i32 save_point = shader.widx; + for (i32 sub_index = -1; sub_index < rsi->sub_shader_descriptor_index_count; sub_index++) { + shader.widx = save_point; + + if (sub_index != -1) + sd = beamformer_shader_descriptors + rsi->sub_shader_descriptor_indices[sub_index]; + + i32 *hvector = beamformer_shader_header_vectors[sd - beamformer_shader_descriptors]; + for (i32 index = 0; index < sd->match_vector_length; index++) + stream_append_s8s(&shader, beamformer_shader_global_header_strings[hvector[index]], s8("\n")); + + i32 instance_save_point = shader.widx; + arena_commit(&arena, instance_save_point); + TempArena arena_save = begin_temp_arena(&arena); + + for (i32 instance = sd->first_match_vector_index; + instance < sd->one_past_last_match_vector_index; + instance++) + { + shader.widx = instance_save_point; + end_temp_arena(arena_save); + + i32 *match_vector = beamformer_shader_match_vectors[instance]; + for (i32 index = 0; index < sd->match_vector_length; index++) { + stream_append_s8s(&shader, s8("#define "), beamformer_shader_descriptor_header_strings[index], s8(" (")); + stream_append_i64(&shader, match_vector[index]); + stream_append_s8(&shader, s8(")\n")); + } + + if (sd->has_local_flags) { + stream_append_s8(&shader, s8("#define ShaderFlags (0x")); + stream_append_hex_u64(&shader, (u64)match_vector[sd->match_vector_length]); + stream_append_s8(&shader, s8(")\n")); + } + + stream_append_s8s(&shader, s8("\n#line 1\n"), file_text); + + arena_commit(&arena, shader.widx - instance_save_point); + + s8 shader_text = stream_to_s8(&shader); + /* TODO(rnp): instance name */ + s8 shader_name = beamformer_shader_names[rsi->kind]; + glDeleteProgram(cc->programs[instance]); + cc->programs[instance] = load_shader(&ctx->os, arena, &shader_text, &src->gl_type, 1, shader_name); + + status.widx = 0; + stream_append_s8s(&status, s8("\r\x1b[2Kloaded shader "), shader_name, s8(": [")); + stream_append_u64(&status, ++completed); + stream_append_s8s(&status, s8("/")); + stream_append_u64(&status, total_shaders); + stream_append_s8s(&status, s8("]")); + os_write_file(ctx->os.error_handle, stream_to_s8(&status)); + } + } + os_write_file(ctx->os.error_handle, s8("\n")); } function void @@ -1089,51 +1107,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c b32 can_commit = 1; switch (work->kind) { case BeamformerWorkKind_ReloadShader:{ - ShaderReloadContext *src = work->shader_reload_context; - b32 success = reload_compute_shader(ctx, src, s8(""), *arena); - /* TODO(rnp): think of a better way of doing this */ - switch (src->kind) { - case BeamformerShaderKind_DAS:{ - src->kind = BeamformerShaderKind_DASFast; - src->shader = cs->programs + src->kind; - success &= reload_compute_shader(ctx, src, s8(" (Fast)"), *arena); - - src->kind = BeamformerShaderKind_DAS; - src->shader = cs->programs + src->kind; - }break; - case BeamformerShaderKind_Decode:{ - read_only local_persist struct { BeamformerShaderKind kind; s8 suffix; } derivatives[] = { - #define X(k, __1, __2, suffix, ...) {BeamformerShaderKind_## k, s8_comp(suffix)}, - DECODE_SHADER_VARIATIONS - #undef X - }; - for EachElement(derivatives, it) { - src->kind = derivatives[it].kind; - src->shader = cs->programs + src->kind; - success &= reload_compute_shader(ctx, src, derivatives[it].suffix, *arena); - } - src->kind = BeamformerShaderKind_Decode; - src->shader = cs->programs + src->kind; - }break; - case BeamformerShaderKind_Filter:{ - read_only local_persist struct { BeamformerShaderKind kind; s8 suffix; } derivatives[] = { - {BeamformerShaderKind_Demodulate, s8_comp(" (Demodulate)")}, - #define X(k, __1, __2, suffix, ...) {BeamformerShaderKind_## k, s8_comp(suffix)}, - FILTER_SHADER_VARIATIONS - #undef X - }; - for EachElement(derivatives, it) { - src->kind = derivatives[it].kind; - src->shader = cs->programs + src->kind; - success &= reload_compute_shader(ctx, src, derivatives[it].suffix, *arena); - } - src->kind = BeamformerShaderKind_Filter; - src->shader = cs->programs + src->kind; - }break; - default:{}break; - } - - if (success && ctx->latest_frame && !sm->live_imaging_parameters.active) { + reload_compute_shader(ctx, work->shader_reload_context, *arena); + if (ctx->latest_frame && !sm->live_imaging_parameters.active) { fill_frame_compute_work(ctx, work, ctx->latest_frame->view_plane_tag, 0, 0); can_commit = 0; } @@ -1235,7 +1210,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, rf->ssbo, slot * rf->size, rf->size); glBeginQuery(GL_TIME_ELAPSED, cc->shader_timer_ids[0]); - do_compute_shader(ctx, cp, frame, pipeline->shaders[0], pipeline->parameters + 0, *arena); + do_compute_shader(ctx, cp, frame, pipeline->shaders[0], pipeline->program_indices[0], + pipeline->parameters + 0, *arena); glEndQuery(GL_TIME_ELAPSED); if (work->kind == BeamformerWorkKind_ComputeIndirect) { @@ -1249,7 +1225,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c for (u32 i = 1; i < pipeline->shader_count; i++) { did_sum_shader |= pipeline->shaders[i] == BeamformerShaderKind_Sum; glBeginQuery(GL_TIME_ELAPSED, cc->shader_timer_ids[i]); - do_compute_shader(ctx, cp, frame, pipeline->shaders[i], pipeline->parameters + i, *arena); + do_compute_shader(ctx, cp, frame, pipeline->shaders[i], pipeline->program_indices[i], + pipeline->parameters + i, *arena); glEndQuery(GL_TIME_ELAPSED); } diff --git a/beamformer.h b/beamformer.h @@ -6,6 +6,9 @@ #include <rlgl.h> #include "util.h" +#include "opengl.h" + +#include "generated/beamformer.meta.c" /////////////////// // REQUIRED OS API @@ -15,7 +18,6 @@ function OS_SHARED_MEMORY_UNLOCK_REGION_FN(os_shared_memory_region_unlock); function OS_WAKE_WAITERS_FN(os_wake_waiters); function OS_WRITE_FILE_FN(os_write_file); -#include "opengl.h" #include "util_gl.c" enum gl_vendor_ids { @@ -106,23 +108,15 @@ typedef struct { } BeamformerFilter; #define DAS_SHADER_FLAGS_LIST \ - X(RxColumns, (1 << 0)) \ - X(TxColumns, (1 << 1)) \ - X(Interpolate, (1 << 2)) \ - X(CoherencyWeighting, (1 << 3)) + X(RxColumns, (1 << 2)) \ + X(TxColumns, (1 << 3)) \ + X(Interpolate, (1 << 4)) \ + X(CoherencyWeighting, (1 << 5)) #define X(k, v, ...) DASShaderFlags_## k = v, typedef enum {DAS_SHADER_FLAGS_LIST} DASShaderFlags; #undef X -static_assert(BeamformerSamplingMode_Count < 4, "filter sample mode mask borked"); -#define FILTER_SHADER_FLAGS_LIST \ - X(SamplingModeMask, ((1 << 0) | (1 << 1))) \ - X(MapChannels, (1 << 2)) -#define X(k, v, ...) FilterShaderFlags_## k = v, -typedef enum {FILTER_SHADER_FLAGS_LIST} FilterShaderFlags; -#undef X - /* X(name, type, gltype) */ #define BEAMFORMER_FILTER_UBO_PARAM_LIST \ X(input_channel_stride, u32, uint) \ @@ -132,7 +126,6 @@ typedef enum {FILTER_SHADER_FLAGS_LIST} FilterShaderFlags; X(output_sample_stride, u32, uint) \ X(output_transmit_stride, u32, uint) \ X(decimation_rate, u32, uint) \ - X(shader_flags, u32, int) \ X(demodulation_frequency, f32, float) \ X(sampling_frequency, f32, float) @@ -174,7 +167,7 @@ typedef alignas(16) struct { #define X(name, type, ...) type name; BEAMFORMER_FILTER_UBO_PARAM_LIST #undef X - float _pad[2]; + float _pad[3]; } BeamformerFilterUBO; static_assert((sizeof(BeamformerFilterUBO) & 15) == 0, "UBO size must be a multiple of 16"); @@ -253,7 +246,8 @@ typedef struct { } BeamformerRFBuffer; typedef struct { - u32 programs[BeamformerShaderKind_ComputeCount]; + /* TODO(rnp): slightly oversized; remove non compute shaders from match vectors count */ + u32 programs[beamformer_match_vectors_count]; BeamformerRFBuffer rf_buffer; BeamformerComputePlan *compute_plans[BeamformerMaxParameterBlockSlots]; @@ -400,14 +394,11 @@ typedef struct { } BeamformerCtx; struct ShaderReloadContext { - BeamformerCtx *beamformer_context; - s8 path; - s8 name; - s8 header; - u32 *shader; + BeamformerCtx *beamformer_context; ShaderReloadContext *link; - GLenum gl_type; - BeamformerShaderKind kind; + s8 header; + GLenum gl_type; + i32 reloadable_info_index; }; #define BEAMFORMER_FRAME_STEP_FN(name) void name(BeamformerCtx *ctx, BeamformerInput *input) @@ -419,8 +410,8 @@ typedef BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute_fn); #define BEAMFORMER_RF_UPLOAD_FN(name) void name(BeamformerUploadThreadContext *ctx, Arena arena) typedef BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload_fn); -#define BEAMFORMER_RELOAD_SHADER_FN(name) b32 name(OS *os, BeamformerCtx *ctx, \ - ShaderReloadContext *src, Arena arena, s8 shader_name) +#define BEAMFORMER_RELOAD_SHADER_FN(name) b32 name(OS *os, s8 path, ShaderReloadContext *src, \ + Arena arena, s8 shader_name) typedef BEAMFORMER_RELOAD_SHADER_FN(beamformer_reload_shader_fn); #define BEAMFORMER_DEBUG_UI_DEINIT_FN(name) void name(BeamformerCtx *ctx) diff --git a/beamformer.meta b/beamformer.meta @@ -0,0 +1,45 @@ +@ShaderGroup Compute +{ + @Shader CudaDecode + @Shader CudaHilbert + + @Shader(decode.glsl) Decode + { + @Permute(DataKind [Int16 Int16Complex Float32 Float32Complex]) + } + + @Shader(filter.glsl) Filter + { + @Permute(DataKind [Int16Complex Float32 Float32Complex]) + { + @PermuteBits([MapChannels ComplexFilter]) + } + + @SubShader Demodulate + { + @Permute(DataKind [Int16 Float32]) + { + @Permute(SamplingMode [2X 4X]) + { + @PermuteBits([MapChannels ComplexFilter]) + } + } + } + } + + @Shader(das.glsl) DAS + { + @Permute(DataKind [Float32 Float32Complex]) + { + @PermuteBits([Fast Sparse]) + } + } + + @Shader(min_max.glsl) MinMax + @Shader(sum.glsl) Sum +} + +@ShaderGroup Render +{ + @Shader(render_3d.frag.glsl) Render3D +} diff --git a/beamformer_parameters.h b/beamformer_parameters.h @@ -10,49 +10,10 @@ * be organized for simple offset access per frame). */ -/* X(enumarant, shader file name, pretty name) */ -#define COMPUTE_SHADERS \ - X(CudaDecode, "", "CUDA Decode") \ - X(CudaHilbert, "", "CUDA Hilbert") \ - X(DAS, "das", "DAS") \ - X(Decode, "decode", "Decode (I16)") \ - X(Filter, "filter", "Filter (F32C)") \ - X(Demodulate, "", "Demodulate (I16)") \ - X(MinMax, "min_max", "Min/Max") \ - X(Sum, "sum", "Sum") - -#define DECODE_SHADER_VARIATIONS \ - X(DecodeInt16Complex, "", "Decode (I16C)", " (I16)") \ - X(DecodeFloat, "", "Decode (F32)", " (F32)") \ - X(DecodeFloatComplex, "", "Decode (F32C)", " (F32C)") \ - X(DecodeInt16ToFloat, "", "Decode (I16-F32)", " (I16-F32)") - -#define FILTER_SHADER_VARIATIONS \ - X(FilterCF, "", "Filter (F32C-CF)", " (F32C-CF)") \ - X(DemodulateCF, "", "Demodulate (I16-CF)", " (I16-CF)") \ - X(DemodulateFloat, "", "Demodulate (F32)", " (F32)") \ - X(DemodulateFloatCF, "", "Demodulate (F32-CF)", " (F32-CF)") - -#define COMPUTE_SHADERS_INTERNAL \ - COMPUTE_SHADERS \ - DECODE_SHADER_VARIATIONS \ - FILTER_SHADER_VARIATIONS \ - X(DASFast, "", "DAS (Fast)") - -typedef enum { - #define X(e, ...) BeamformerShaderKind_##e, - COMPUTE_SHADERS_INTERNAL - #undef X - BeamformerShaderKind_Render3D, - BeamformerShaderKind_Count, - - BeamformerShaderKind_ComputeCount = BeamformerShaderKind_Render3D, -} BeamformerShaderKind; - typedef struct { /* NOTE(rnp): this wants to be iterated on both dimensions. it depends entirely on which * visualization method you want to use. the coalescing function wants both directions */ - float times[32][BeamformerShaderKind_Count]; + float times[32][BeamformerShaderKind_ComputeCount]; float rf_time_deltas[32]; } BeamformerComputeStatsTable; @@ -61,11 +22,6 @@ typedef struct { X(NONE, 0, "None") \ X(HADAMARD, 1, "Hadamard") -#define SAMPLING_MODES_LIST \ - X(NS200BW, 0) \ - X(BS100BW, 1) \ - X(BS50BW, 2) - #define TRANSMIT_MODES_LIST \ X(Rows) \ X(Columns) @@ -82,20 +38,6 @@ typedef enum {TRANSMIT_MODES_LIST} BeamformerTransmitModes; typedef enum {RECEIVE_MODES_LIST} BeamformerReceiveModes; #undef X -#define X(k, v, ...) BeamformerSamplingMode_## k = v, -typedef enum {SAMPLING_MODES_LIST BeamformerSamplingMode_Count} BeamformerSamplingModes; -#undef X - -#define BEAMFORMER_DATA_KIND_LIST \ - X(Int16, 0) \ - X(Int16Complex, 1) \ - X(Float32, 2) \ - X(Float32Complex, 3) - -#define X(k, id) BeamformerDataKind_##k = id, -typedef enum {BEAMFORMER_DATA_KIND_LIST} BeamformerDataKind; -#undef X - /* TODO(rnp): this is an absolute abuse of the preprocessor, but now is * not a good time to write a full metaprogram */ #define BEAMFORMER_FILTER_KIND_LIST(type, _) \ @@ -148,10 +90,6 @@ typedef enum { #define DAS_LOCAL_SIZE_Y 1 #define DAS_LOCAL_SIZE_Z 16 -#define DAS_FAST_LOCAL_SIZE_X 16 -#define DAS_FAST_LOCAL_SIZE_Y 1 -#define DAS_FAST_LOCAL_SIZE_Z 16 - #define DAS_VOXEL_OFFSET_UNIFORM_LOC 2 #define DAS_CYCLE_T_UNIFORM_LOC 3 #define DAS_FAST_CHANNEL_UNIFORM_LOC 4 diff --git a/beamformer_shared_memory.c b/beamformer_shared_memory.c @@ -116,6 +116,7 @@ typedef union { typedef struct { BeamformerShaderKind shaders[BeamformerMaxComputeShaderStages]; BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages]; + u32 program_indices[BeamformerMaxComputeShaderStages]; u32 shader_count; BeamformerDataKind data_kind; } BeamformerComputePipeline; diff --git a/build.c b/build.c @@ -2,6 +2,10 @@ /* NOTE: inspired by nob: https://github.com/tsoding/nob.h */ /* TODO(rnp): + * [ ]: refactor: "base" shaders should only be reloadable shaders + * - internally when a shader with no file is encountered it should + * not get pushed as a "base" shader. + * [ ]: bug: column indicator for compile error is off * [ ]: bake shaders and font data into binary * - for shaders there is a way of making a separate data section and referring * to it with extern from the C source (bake both data and size) @@ -11,10 +15,12 @@ * [ ]: seperate dwarf debug info */ #include <stdarg.h> +#include <setjmp.h> #include <stdio.h> #include "util.h" +#define BeamformerShaderKind_ComputeCount (1) #include "beamformer_parameters.h" global char *g_argv0; @@ -105,21 +111,11 @@ global char *g_argv0; #define shift(list, count) ((count)--, *(list)++) -#define da_append_count(a, s, items, item_count) do { \ - da_reserve((a), (s), (s)->count + (item_count)); \ - mem_copy((s)->data + (s)->count, (items), sizeof(*(items)) * (uz)(item_count)); \ - (s)->count += (item_count); \ -} while (0) - #define cmd_append_count da_append_count #define cmd_append(a, s, ...) da_append_count(a, s, ((char *[]){__VA_ARGS__}), \ (iz)(sizeof((char *[]){__VA_ARGS__}) / sizeof(char *))) -typedef struct { - char **data; - iz count; - iz capacity; -} CommandList; +DA_STRUCT(char *, Command); typedef struct { b32 debug; @@ -130,10 +126,11 @@ typedef struct { } Options; #define BUILD_LOG_KINDS \ - X(Error, "\x1B[31m[ERROR]\x1B[0m ") \ - X(Warning, "\x1B[33m[WARNING]\x1B[0m ") \ - X(Info, "\x1B[32m[INFO]\x1B[0m ") \ - X(Command, "\x1B[36m[COMMAND]\x1B[0m ") + X(Error, "\x1B[31m[ERROR]\x1B[0m ") \ + X(Warning, "\x1B[33m[WARNING]\x1B[0m ") \ + X(Generate, "\x1B[32m[GENERATE]\x1B[0m ") \ + X(Info, "\x1B[33m[INFO]\x1B[0m ") \ + X(Command, "\x1B[36m[COMMAND]\x1B[0m ") #define X(t, ...) BuildLogKind_##t, typedef enum {BUILD_LOG_KINDS BuildLogKind_Count} BuildLogKind; #undef X @@ -152,9 +149,10 @@ build_log_base(BuildLogKind kind, char *format, va_list args) #define build_log_failure(format, ...) build_log(BuildLogKind_Error, \ "failed to build: " format, ##__VA_ARGS__) -#define build_log_info(...) build_log(BuildLogKind_Info, ##__VA_ARGS__) -#define build_log_command(...) build_log(BuildLogKind_Command, ##__VA_ARGS__) -#define build_log_warning(...) build_log(BuildLogKind_Warning, ##__VA_ARGS__) +#define build_log_generate(...) build_log(BuildLogKind_Generate, ##__VA_ARGS__) +#define build_log_info(...) build_log(BuildLogKind_Info, ##__VA_ARGS__) +#define build_log_command(...) build_log(BuildLogKind_Command, ##__VA_ARGS__) +#define build_log_warning(...) build_log(BuildLogKind_Warning, ##__VA_ARGS__) function void build_log(BuildLogKind kind, char *format, ...) { @@ -271,7 +269,7 @@ os_wait_close_process(iptr handle) } } else { /* TODO(rnp): handle multiple children */ - INVALID_CODE_PATH; + InvalidCodePath; } } return result; @@ -661,23 +659,6 @@ build_helper_library(Arena arena, CommandList cc) b32 result = build_shared_library(arena, cc, "ogl_beamformer_lib", library, libs, libs_count, arg_list(char *, "helpers/ogl_beamformer_lib.c")); - - ///////////// - // header - char *lib_header_out = OUTPUT("ogl_beamformer_lib.h"); - if (needs_rebuild(lib_header_out, "helpers/ogl_beamformer_lib_base.h")) { - s8 parameters_header = os_read_whole_file(&arena, "beamformer_parameters.h"); - s8 base_header = os_read_whole_file(&arena, "helpers/ogl_beamformer_lib_base.h"); - result = parameters_header.len != 0 && base_header.len != 0 && - parameters_header.data + parameters_header.len == base_header.data; - if (result) { - s8 output_file = parameters_header; - output_file.len += base_header.len; - result &= os_write_new_file(lib_header_out, output_file); - } - if (!result) build_log_failure("%s", lib_header_out); - } - return result; } @@ -723,6 +704,16 @@ typedef struct { iz capacity; } s8_list; +function s8 +s8_chop(s8 *in, iz count) +{ + count = CLAMP(count, 0, in->len); + s8 result = {.data = in->data, .len = count}; + in->data += count; + in->len -= count; + return result; +} + function void s8_split(s8 str, s8 *left, s8 *right, u8 byte) { @@ -759,6 +750,7 @@ s8_list_from_s8(s8_list *list, Arena *arena, s8 str) typedef struct { Stream stream; + Arena scratch; i32 indentation_level; } MetaprogramContext; @@ -772,13 +764,6 @@ meta_write_and_reset(MetaprogramContext *m, char *file) return result; } -function void -meta_indent(MetaprogramContext *m) -{ - for (i32 count = m->indentation_level; count > 0; count--) - stream_append_byte(&m->stream, '\t'); -} - #define meta_push(m, ...) meta_push_(m, arg_list(s8, __VA_ARGS__)) function void meta_push_(MetaprogramContext *m, s8 *items, iz count) @@ -786,11 +771,15 @@ meta_push_(MetaprogramContext *m, s8 *items, iz count) stream_append_s8s_(&m->stream, items, count); } +#define meta_pad(m, b, n) stream_pad(&(m)->stream, (b), (n)) +#define meta_indent(m) meta_pad((m), '\t', (m)->indentation_level) #define meta_begin_line(m, ...) do { meta_indent(m); meta_push(m, __VA_ARGS__); } while(0) -#define meta_end_line(m, ...) do { meta_push(m, __VA_ARGS__, s8("\n")); } while(0) +#define meta_end_line(m, ...) meta_push(m, __VA_ARGS__, s8("\n")) #define meta_push_line(m, ...) do { meta_indent(m); meta_push(m, __VA_ARGS__, s8("\n")); } while(0) #define meta_begin_scope(m, ...) do { meta_push_line(m, __VA_ARGS__); (m)->indentation_level++; } while(0) #define meta_end_scope(m, ...) do { (m)->indentation_level--; meta_push_line(m, __VA_ARGS__); } while(0) +#define meta_push_u64(m, n) stream_append_u64(&(m)->stream, (n)) +#define meta_push_u64_hex(m, n) stream_append_hex_u64(&(m)->stream, (n)) #define meta_begin_matlab_class_cracker(_1, _2, FN, ...) FN #define meta_begin_matlab_class_1(m, name) meta_begin_scope(m, s8("classdef " name)) @@ -828,135 +817,1613 @@ meta_end_and_write_matlab(MetaprogramContext *m, char *path) return result; } -function b32 -build_matlab_bindings(Arena arena) +#define META_ENTRY_KIND_LIST \ + X(BeginScope) \ + X(EndScope) \ + X(Permute) \ + X(PermuteBits) \ + X(Shader) \ + X(ShaderGroup) \ + X(SubShader) + +#define X(k, ...) MetaEntryKind_## k, +typedef enum {META_ENTRY_KIND_LIST} MetaEntryKind; +#undef X + +#define X(k, ...) #k, +read_only global char *meta_entry_kind_strings[] = {META_ENTRY_KIND_LIST}; +#undef X + +typedef struct { u32 line, column; } MetaLocation; + +#define META_ENTRY_ARGUMENT_KIND_LIST \ + X(None) \ + X(String) \ + X(Array) + +#define X(k, ...) MetaEntryArgumentKind_## k, +typedef enum {META_ENTRY_ARGUMENT_KIND_LIST} MetaEntryArgumentKind; +#undef X + +typedef struct { + MetaEntryArgumentKind kind; + MetaLocation location; + union { + s8 string; + struct { + s8 *strings; + u64 count; + }; + }; +} MetaEntryArgument; + +typedef struct { + MetaEntryKind kind; + u32 argument_count; + MetaEntryArgument *arguments; + s8 name; + MetaLocation location; +} MetaEntry; + +typedef struct { + MetaEntry *data; + iz count; + iz capacity; + s8 raw; +} MetaEntryStack; + +#define META_PARSE_TOKEN_LIST \ + X('@', Entry) \ + X('(', BeginArgs) \ + X(')', EndArgs) \ + X('[', BeginArray) \ + X(']', EndArray) \ + X('{', BeginScope) \ + X('}', EndScope) + +typedef enum { + MetaParseToken_EOF, + MetaParseToken_String, + #define X(__1, kind, ...) MetaParseToken_## kind, + META_PARSE_TOKEN_LIST + #undef X + MetaParseToken_Count, +} MetaParseToken; + +typedef union { + MetaEntryKind kind; + s8 string; +} MetaParseUnion; + +typedef struct { + s8 s; + MetaLocation location; +} MetaParsePoint; + +typedef struct { + MetaParsePoint p; + MetaParseUnion u; + MetaParsePoint save_point; +} MetaParser; + +global char *compiler_file; +global jmp_buf compiler_jmp_buf; + +#define meta_parser_save(v) (v)->save_point = (v)->p +#define meta_parser_restore(v) swap((v)->p, (v)->save_point) +#define meta_parser_commit(v) meta_parser_restore(v) + +#define meta_compiler_error_message(loc, format, ...) \ + fprintf(stderr, "%s:%u:%u: error: "format, compiler_file, \ + loc.line + 1, loc.column + 1, ##__VA_ARGS__) + +#define meta_compiler_error(loc, format, ...) do { \ + meta_compiler_error_message(loc, format, ##__VA_ARGS__); \ + meta_error(); \ +} while (0) + +#define meta_entry_error(e, ...) meta_entry_error_column((e), (i32)(e)->location.column, __VA_ARGS__) +#define meta_entry_error_column(e, column, ...) do { \ + meta_compiler_error_message((e)->location, __VA_ARGS__); \ + meta_entry_print((e), 1, (column)); \ + meta_error(); \ +} while(0) + +#define meta_entry_error_location(e, loc, ...) do { \ + meta_compiler_error_message((loc), __VA_ARGS__); \ + meta_entry_print((e), 1, (i32)(loc).column); \ + meta_error(); \ +} while (0) + +function no_return void +meta_error(void) { - b32 result = 1; - os_make_directory(OUTPUT("matlab")); + assert(0); + longjmp(compiler_jmp_buf, 1); +} - Arena scratch = sub_arena(&arena, MB(1), 16); +function void +meta_entry_print(MetaEntry *e, i32 depth, i32 caret) +{ + char *kind = meta_entry_kind_strings[e->kind]; + if (e->kind == MetaEntryKind_BeginScope) kind = "{"; + if (e->kind == MetaEntryKind_EndScope) kind = "}"; + + fprintf(stderr, "%*s@%s", depth * 2, "", kind); + + if (e->argument_count) { + fprintf(stderr, "("); + for (u32 i = 0; i < e->argument_count; i++) { + MetaEntryArgument *a = e->arguments + i; + if (i != 0) fprintf(stderr, " "); + if (a->kind == MetaEntryArgumentKind_Array) { + fprintf(stderr, "["); + for (u64 j = 0; j < a->count; j++) { + if (j != 0) fprintf(stderr, " "); + fprintf(stderr, "%.*s", (i32)a->strings[j].len, a->strings[j].data); + } + fprintf(stderr, "]"); + } else { + fprintf(stderr, "%.*s", (i32)a->string.len, a->string.data); + } + } + fprintf(stderr, ")"); + } + if (e->name.len) fprintf(stderr, " %.*s", (i32)e->name.len, e->name.data); - char *out = OUTPUT("matlab/OGLBeamformerLiveFeedbackFlags.m"); - if (needs_rebuild(out, "beamformer_parameters.h")) { - /* TODO(rnp): recreate/clear directory incase these file names change */ - MetaprogramContext m = {.stream = arena_stream(arena)}; + if (caret >= 0) fprintf(stderr, "\n%.*s^", depth * 2 + caret, ""); - #define X(name, flag, ...) meta_push_line(&m, s8(#name " (" str(flag) ")")); - meta_begin_matlab_class(&m, "OGLBeamformerLiveFeedbackFlags", "int32"); - meta_begin_scope(&m, s8("enumeration")); - BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST - result &= meta_end_and_write_matlab(&m, out); + fprintf(stderr, "\n"); +} - meta_begin_matlab_class(&m, "OGLBeamformerDataKind", "int32"); - meta_begin_scope(&m, s8("enumeration")); - BEAMFORMER_DATA_KIND_LIST - result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerDataKind.m")); - #undef X +function MetaEntryKind +meta_entry_kind_from_string(s8 s) +{ + #define X(k, ...) s8_comp(#k), + read_only local_persist s8 kinds[] = {META_ENTRY_KIND_LIST}; + #undef X + i32 result = -1; + for EachElement(kinds, it) { + if (s8_equal(kinds[it], s)) { + result = (i32)it; + break; + } + } + return (MetaEntryKind)result; +} - #define X(kind, ...) meta_push_matlab_enum_with_value(&m, s8(#kind), BeamformerFilterKind_## kind); - meta_begin_matlab_class(&m, "OGLBeamformerFilterKind", "int32"); - meta_begin_scope(&m, s8("enumeration")); - BEAMFORMER_FILTER_KIND_LIST(,) - result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerFilterKind.m")); - #undef X +function void +meta_parser_trim(MetaParser *p) +{ + u8 *s, *end = p->p.s.data + p->p.s.len; + b32 done = 0; + b32 comment = 0; + for (s = p->p.s.data; !done && s != end;) { + switch (*s) { + case '\r': case '\t': case ' ': + { + p->p.location.column++; + }break; + case '\n':{ p->p.location.line++; p->p.location.column = 0; comment = 0; }break; + case '/':{ + comment = ((s + 1) != end && s[1] == '/'); + } /* FALLTHROUGH */ + default:{done = !comment;}break; + } + if (!done) s++; + } + p->p.s.data = s; + p->p.s.len = end - s; +} - #define X(kind, ...) meta_push_matlab_enum_with_value(&m, s8(#kind), BeamformerShaderKind_## kind); - meta_begin_matlab_class(&m, "OGLBeamformerShaderStage", "int32"); - meta_begin_scope(&m, s8("enumeration")); - COMPUTE_SHADERS - result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerShaderStage.m")); +function s8 +meta_parser_extract_string(MetaParser *p) +{ + s8 result = {.data = p->p.s.data}; + for (; result.len < p->p.s.len; result.len++) { + b32 done = 0; + switch (p->p.s.data[result.len]) { + #define X(t, ...) case t: + META_PARSE_TOKEN_LIST #undef X + case ' ': case '\n': case '\r': case '\t': + {done = 1;}break; + case '/':{ + done = (result.len + 1 < p->p.s.len) && (p->p.s.data[result.len + 1] == '/'); + }break; + default:{}break; + } + if (done) break; + } + p->p.location.column += (u32)result.len; + p->p.s.data += result.len; + p->p.s.len -= result.len; + return result; +} - #define X(kind, ...) meta_push_matlab_enum_with_value(&m, s8(#kind), BeamformerTransmitMode_## kind); - meta_begin_matlab_class(&m, "OGLBeamformerTransmitModes", "int32"); - meta_begin_scope(&m, s8("enumeration")); - TRANSMIT_MODES_LIST - result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerTransmitModes.m")); +function s8 +meta_parser_token_name(MetaParser *p, MetaParseToken t) +{ + s8 result = s8("\"invalid\""); + read_only local_persist s8 names[MetaParseToken_Count] = { + [MetaParseToken_EOF] = s8_comp("\"EOF\""), + #define X(k, v, ...) [MetaParseToken_## v] = s8_comp(#k), + META_PARSE_TOKEN_LIST #undef X + }; + if (t >= 0 && t < countof(names)) result = names[t]; + if (t == MetaParseToken_String) result = p->u.string; + return result; +} - #define X(kind, ...) meta_push_matlab_enum_with_value(&m, s8(#kind), BeamformerReceiveMode_## kind); - meta_begin_matlab_class(&m, "OGLBeamformerReceiveModes", "int32"); - meta_begin_scope(&m, s8("enumeration")); - RECEIVE_MODES_LIST - result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerReceiveModes.m")); +function MetaParseToken +meta_parser_token(MetaParser *p) +{ + MetaParseToken result = MetaParseToken_EOF; + meta_parser_save(p); + if (p->p.s.len > 0) { + b32 chop = 1; + switch (p->p.s.data[0]) { + #define X(t, kind, ...) case t:{ result = MetaParseToken_## kind; }break; + META_PARSE_TOKEN_LIST #undef X + default:{ result = MetaParseToken_String; chop = 0; }break; + } + if (chop) { s8_chop(&p->p.s, 1); p->p.location.column++; } - #define X(kind, v, ...) meta_push_line(&m, s8(#kind " (" #v ")")); - meta_begin_matlab_class(&m, "OGLBeamformerSamplingModes", "int32"); - meta_begin_scope(&m, s8("enumeration")); - SAMPLING_MODES_LIST - result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerSamplingModes.m")); - #undef X + meta_parser_trim(p); + switch (result) { + case MetaParseToken_String:{ p->u.string = meta_parser_extract_string(p); }break; + + /* NOTE(rnp): '{' and '}' are shorthand for @BeginScope and @EndScope */ + case MetaParseToken_BeginScope:{ p->u.kind = MetaEntryKind_BeginScope; }break; + case MetaParseToken_EndScope:{ p->u.kind = MetaEntryKind_EndScope; }break; + + case MetaParseToken_Entry:{ + s8 kind = meta_parser_extract_string(p); + p->u.kind = meta_entry_kind_from_string(kind); + if (p->u.kind < 0) { + meta_compiler_error(p->p.location, "invalid meta kind: %.*s\n", (i32)kind.len, kind.data); + } + }break; + default:{}break; + } + meta_parser_trim(p); + } + + return result; +} + +function MetaParseToken +meta_parser_peek_token(MetaParser *p) +{ + MetaParseToken result = meta_parser_token(p); + meta_parser_restore(p); + return result; +} + +function void +meta_parser_unexpected_token(MetaParser *p, MetaParseToken t) +{ + meta_parser_restore(p); + s8 token_name = meta_parser_token_name(p, t); + meta_compiler_error(p->p.location, "unexpected token: %.*s\n", (i32)token_name.len, token_name.data); +} + +function void +meta_parser_arguments(MetaParser *p, MetaEntry *e, Arena *arena) +{ + if (meta_parser_peek_token(p) == MetaParseToken_BeginArgs) { + meta_parser_commit(p); + + MetaEntryArgument *arg = e->arguments = push_struct(arena, MetaEntryArgument); + b32 array = 0; + for (MetaParseToken token = meta_parser_token(p); + token != MetaParseToken_EndArgs; + token = meta_parser_token(p)) + { + if (!arg) arg = push_struct(arena, MetaEntryArgument); + switch (token) { + case MetaParseToken_String:{ + if (array) { + assert((u8 *)(arg->strings + arg->count) == arena->beg); + *push_struct(arena, s8) = p->u.string; + arg->count++; + } else { + e->argument_count++; + arg->kind = MetaEntryArgumentKind_String; + arg->string = p->u.string; + arg->location = p->p.location; + arg = 0; + } + }break; + case MetaParseToken_BeginArray:{ + arg->kind = MetaEntryArgumentKind_Array; + arg->strings = (s8 *)arena_aligned_start(*arena, alignof(s8)); + arg->location = p->p.location; + array = 1; + }break; + case MetaParseToken_EndArray:{ + e->argument_count++; + array = 0; + arg = 0; + }break; + default:{ meta_parser_unexpected_token(p, token); }break; + } + } + } +} + +function MetaEntryStack +meta_entry_stack_from_file(Arena *arena, Arena scratch, char *file) +{ + MetaParser parser = {.p.s = os_read_whole_file(arena, file)}; + MetaEntryStack result = {.raw = parser.p.s}; + + compiler_file = file; + + meta_parser_trim(&parser); + + for (MetaParseToken token = meta_parser_token(&parser); + token != MetaParseToken_EOF; + token = meta_parser_token(&parser)) + { + MetaEntry *e = da_push(arena, &result); + switch (token) { + case MetaParseToken_BeginScope: + case MetaParseToken_EndScope: + case MetaParseToken_Entry: + { + e->kind = parser.u.kind; + e->location = parser.save_point.location; + + if (token == MetaParseToken_Entry) + meta_parser_arguments(&parser, e, arena); + + if (meta_parser_peek_token(&parser) == MetaParseToken_String) { + meta_parser_commit(&parser); + e->name = parser.u.string; + } + }break; + + default:{ meta_parser_unexpected_token(&parser, token); }break; + } + } + + return result; +} + +#define meta_entry_argument_expected(e, ...) \ + meta_entry_argument_expected_((e), arg_list(s8, __VA_ARGS__)) +function void +meta_entry_argument_expected_(MetaEntry *e, s8 *args, uz count) +{ + if (e->argument_count != count) { + meta_compiler_error_message(e->location, "incorrect argument count for entry %s() got: %u expected: %u\n", + meta_entry_kind_strings[e->kind], e->argument_count, (u32)count); + fprintf(stderr, " format: @%s(", meta_entry_kind_strings[e->kind]); + for (uz i = 0; i < count; i++) { + if (i != 0) fprintf(stderr, ", "); + fprintf(stderr, "%.*s", (i32)args[i].len, args[i].data); + } + fprintf(stderr, ")\n"); + meta_error(); + } +} + +function MetaEntryArgument +meta_entry_argument_expect(MetaEntry *e, u32 index, MetaEntryArgumentKind kind) +{ + #define X(k, ...) #k, + read_only local_persist char *kinds[] = {META_ENTRY_ARGUMENT_KIND_LIST}; + #undef X + + assert(e->argument_count > index); + MetaEntryArgument result = e->arguments[index]; + + if (result.kind != kind) { + meta_entry_error_location(e, result.location, "unexpected argument kind: expected %s but got: %s\n", + kinds[kind], kinds[result.kind]); + } + + if (kind == MetaEntryArgumentKind_Array && result.count == 0) + meta_entry_error_location(e, result.location, "array arguments must have at least 1 element\n"); + + return result; +} + +typedef struct { + s8_list *data; + iz count; + iz capacity; +} s8_list_table; + +typedef struct { + iz kind; + iz variation; +} MetaPermutation; + +typedef struct { + u32 *data; + iz count; + iz capacity; +} MetaIDList; + +typedef struct { + u32 *global_flags; + u8 *local_flags; + u8 global_flags_count; + u8 local_flags_count; +} MetaShaderPermutation; +DA_STRUCT(MetaShaderPermutation, MetaShaderPermutation); + +typedef struct { + MetaShaderPermutationList permutations; + MetaIDList global_flag_ids; + u32 base_name_id; + u32 flag_list_id; +} MetaShader; +DA_STRUCT(MetaShader, MetaShader); + +typedef struct { + MetaShader *shader; + MetaIDList sub_shaders; + s8 file; +} MetaBaseShader; +DA_STRUCT(MetaBaseShader, MetaBaseShader); + +typedef struct { + i32 first_match_vector_index; + i32 one_past_last_match_vector_index; + i32 sub_field_count; + b32 has_local_flags; +} MetaShaderDescriptor; + +typedef struct { + s8 name; + MetaIDList shaders; +} MetaShaderGroup; +DA_STRUCT(MetaShaderGroup, MetaShaderGroup); + +typedef struct { + Arena *arena, scratch; + + s8_list permutation_kinds; + s8_list_table permutations_for_kind; + + s8_list_table flags_for_shader; + + MetaShaderGroupList shader_groups; + MetaShaderList shaders; + MetaBaseShaderList base_shaders; + s8_list shader_names; + + MetaShaderDescriptor *shader_descriptors; +} MetaContext; + +function iz +meta_lookup_string_slow(s8_list *sv, s8 s) +{ + // TODO(rnp): obviously this is slow + iz result = -1; + for (iz i = 0; i < sv->count; i++) { + if (s8_equal(s, sv->data[i])) { + result = i; + break; + } + } + return result; +} + +function iz +meta_lookup_id_slow(MetaIDList *v, u32 id) +{ + // TODO(rnp): obviously this is slow + iz result = -1; + for (iz i = 0; i < v->count; i++) { + if (id == v->data[i]) { + result = i; + break; + } + } + return result; +} + +function iz +meta_intern_string(MetaContext *ctx, s8_list *sv, s8 s) +{ + iz result = meta_lookup_string_slow(sv, s); + if (result < 0) { + *da_push(ctx->arena, sv) = s; + result = sv->count - 1; + } + return result; +} + +function iz +meta_intern_id(MetaContext *ctx, MetaIDList *v, u32 id) +{ + iz result = meta_lookup_id_slow(v, id); + if (result < 0) { + *da_push(ctx->arena, v) = id; + result = v->count - 1; + } + return result; +} + +function MetaPermutation +meta_commit_permutation(MetaContext *ctx, s8 kind, s8 variation) +{ + iz kidx = meta_intern_string(ctx, &ctx->permutation_kinds, kind); + if (ctx->permutation_kinds.count != ctx->permutations_for_kind.count) { + da_push(ctx->arena, &ctx->permutations_for_kind); + assert(kidx == (ctx->permutations_for_kind.count - 1)); + } + + iz vidx = meta_intern_string(ctx, ctx->permutations_for_kind.data + kidx, variation); + MetaPermutation result = {.kind = kidx, .variation = vidx}; + return result; +} + +function u16 +meta_pack_shader_name(MetaContext *ctx, s8 base_name, MetaLocation loc) +{ + iz result = meta_intern_string(ctx, &ctx->shader_names, base_name); + if (result > (iz)U16_MAX) + meta_compiler_error(loc, "maximum base shaders exceeded: limit: %lu\n", U16_MAX); + return (u16)result; +} + +function u8 +meta_commit_shader_flag(MetaContext *ctx, u32 flag_list_id, s8 flag, MetaEntry *e) +{ + assert(flag_list_id < ctx->flags_for_shader.count); + iz index = meta_intern_string(ctx, ctx->flags_for_shader.data + flag_list_id, flag); + if (index > 7) meta_entry_error(e, "Shaders only support 8 local flags\n"); + u8 result = (u8)index; + return result; +} + +typedef struct { + u16 entry_id; + struct {u8 current; u8 target;} cursor; + u32 permutation_id; +} MetaShaderPermutationStackFrame; + +typedef struct { + MetaEntry *base_entry; + + MetaShaderPermutationStackFrame *data; + iz count; + iz capacity; +} MetaShaderPermutationStack; + +function void +meta_pack_shader_permutation(MetaContext *ctx, MetaShaderPermutation *sp, MetaShader *base_shader, + u32 local_flags, MetaShaderPermutationStack *stack, MetaEntry *last, + u32 frame_cursor) +{ + //////////////////////////////////// + // NOTE: fill ids from up the stack + u32 local_flag_index = 0; + u32 global_flag_index = 0; + for (iz i = 0; i < stack->count; i++) { + MetaShaderPermutationStackFrame *f = stack->data + i; + MetaEntry *e = stack->base_entry + f->entry_id; + MetaEntryArgument *a = e->arguments; + u32 cursor = f->cursor.current; + switch (e->kind) { + case MetaEntryKind_PermuteBits:{ + if (f->permutation_id == U32_MAX) + f->permutation_id = meta_commit_shader_flag(ctx, base_shader->flag_list_id, a->strings[cursor], e); + sp->local_flags[local_flag_index++] = (u8)(1u << f->permutation_id); + }break; + case MetaEntryKind_Permute:{ + if (f->permutation_id == U32_MAX) { + MetaPermutation p = meta_commit_permutation(ctx, a[0].string, a[1].strings[cursor]); + f->permutation_id = ((u32)(p.kind & 0xFFFFu) << 16) | (u32)(p.variation & 0xFFFFu); + meta_intern_id(ctx, &base_shader->global_flag_ids, (u32)p.kind); + } + sp->global_flags[global_flag_index++] = f->permutation_id; + }break; + InvalidDefaultCase; + } + } + + /////////////////////////////////// + // NOTE: fill ids from stack frame + MetaEntryArgument *a = last->arguments; + switch (last->kind) { + case MetaEntryKind_PermuteBits:{ + u32 packed = local_flags; + u32 test = frame_cursor; + for EachBit(test, flag) { + u32 flag_index = meta_commit_shader_flag(ctx, base_shader->flag_list_id, a->strings[flag], last); + packed |= (1u << flag_index); + } + sp->local_flags[local_flag_index++] = (u8)packed; + }break; + case MetaEntryKind_Permute:{ + MetaPermutation p = meta_commit_permutation(ctx, a[0].string, a[1].strings[frame_cursor]); + u32 packed = ((u32)(p.kind & 0xFFFFu) << 16) | (u32)(p.variation & 0xFFFFu); + sp->global_flags[global_flag_index++] = packed; + meta_intern_id(ctx, &base_shader->global_flag_ids, (u32)p.kind); + }break; + InvalidDefaultCase; + } +} + +function void +meta_pop_and_pack_shader_permutations(MetaContext *ctx, MetaShader *base_shader, u32 local_flags, + MetaShaderPermutationStack *stack) +{ + assert(stack->count > 0); + + u32 global_flag_count = 0; + u32 local_flag_count = 0; + + for (iz i = 0; i < stack->count; i++) { + switch (stack->base_entry[stack->data[i].entry_id].kind) { + case MetaEntryKind_PermuteBits:{ local_flag_count++; }break; + case MetaEntryKind_Permute:{ global_flag_count++; }break; + InvalidDefaultCase; + } + } + + MetaShaderPermutationStackFrame *f = stack->data + (--stack->count); + MetaEntry *last = stack->base_entry + f->entry_id; + assert(f->cursor.current == 0); + for (u32 cursor = 0; cursor < f->cursor.target; cursor++) { + MetaShaderPermutation *sp = da_push(ctx->arena, &base_shader->permutations); + sp->global_flags_count = (u8)global_flag_count; + sp->local_flags_count = (u8)local_flag_count; + sp->global_flags = push_array(ctx->arena, typeof(*sp->global_flags), global_flag_count); + sp->local_flags = push_array(ctx->arena, typeof(*sp->local_flags), local_flag_count); + + meta_pack_shader_permutation(ctx, sp, base_shader, local_flags, stack, last, cursor); + } +} + +function void +meta_emit_shader_permutations(MetaContext *ctx, Arena scratch, MetaShader *s, u32 local_flags, + MetaEntry *entries, iz entry_count) +{ + assert(entry_count > 0); + assert(entries[0].kind == MetaEntryKind_Permute || + entries[0].kind == MetaEntryKind_PermuteBits || + entries[0].kind == MetaEntryKind_SubShader); + + MetaShaderPermutationStack stack = {.base_entry = entries}; + da_reserve(&scratch, &stack, 32); + + b32 done = 0; + for (iz i = 0; i < entry_count && !done; i++) { + MetaEntry *e = entries + i; + switch (e->kind) { + case MetaEntryKind_PermuteBits: + case MetaEntryKind_Permute: + { + if (stack.count && stack.data[stack.count - 1].entry_id == (u16)i) { + MetaShaderPermutationStackFrame *f = stack.data + (stack.count - 1); + f->permutation_id = U32_MAX; + f->cursor.current++; + if (f->cursor.current == f->cursor.target) { + stack.count--; + done = stack.count == 0; + } + } else { + u8 target; + if (e->kind == MetaEntryKind_Permute) { + meta_entry_argument_expected(e, s8("kind"), s8("[id ...]")); + target = (u8)meta_entry_argument_expect(e, 1, MetaEntryArgumentKind_Array).count; + } else { + meta_entry_argument_expected(e, s8("[id ...]")); + u32 count = (u32)meta_entry_argument_expect(e, 0, MetaEntryArgumentKind_Array).count; + target = (u8)(2u << (count - 1)); + } + *da_push(&scratch, &stack) = (MetaShaderPermutationStackFrame){ + .entry_id = (u16)i, + .permutation_id = U32_MAX, + .cursor.target = target, + }; + } + }break; + case MetaEntryKind_SubShader:{}break; + case MetaEntryKind_BeginScope:{}break; + case MetaEntryKind_EndScope:{ + meta_pop_and_pack_shader_permutations(ctx, s, local_flags, &stack); + if (stack.count != 0) + i = stack.data[stack.count - 1].entry_id - 1; + }break; + InvalidDefaultCase; + } + } + if (stack.count) { + assert(stack.count == 1); + meta_pop_and_pack_shader_permutations(ctx, s, local_flags, &stack); + } +} + +function iz +meta_pack_shader(MetaContext *ctx, MetaShaderGroup *sg, Arena scratch, MetaEntry *entries, iz entry_count) +{ + assert(entries[0].kind == MetaEntryKind_Shader); + + MetaBaseShader *base_shader = da_push(ctx->arena, &ctx->base_shaders); + MetaShader *s = da_push(ctx->arena, &ctx->shaders); + *da_push(ctx->arena, &sg->shaders) = (u32)da_index(s, &ctx->shaders); + { + s8_list *flag_list = da_push(ctx->arena, &ctx->flags_for_shader); + s->flag_list_id = (u32)da_index(flag_list, &ctx->flags_for_shader); + } + + base_shader->shader = s; + if (entries->argument_count > 1) { + meta_entry_argument_expected(entries, s8("[file_name]")); + } else if (entries->argument_count == 1) { + base_shader->file = meta_entry_argument_expect(entries, 0, MetaEntryArgumentKind_String).string; + } + s->base_name_id = meta_pack_shader_name(ctx, entries->name, entries->location); + + i32 stack_items[32]; + struct { i32 *data; iz capacity; iz count; } stack = {stack_items, countof(stack_items), 0}; + + iz result; + b32 in_sub_shader = 0; + for (result = 0; result < entry_count; result++) { + MetaEntry *e = entries + result; + switch (e->kind) { + case MetaEntryKind_BeginScope:{}break; + case MetaEntryKind_SubShader:{ + if (in_sub_shader) goto error; + in_sub_shader = 1; + } /* FALLTHROUGH */ + case MetaEntryKind_PermuteBits: + case MetaEntryKind_Permute: + case MetaEntryKind_Shader: + { + *da_push(&scratch, &stack) = (i32)result; + if ((result + 1 < entry_count) && entries[result + 1].kind == MetaEntryKind_BeginScope) + break; + } /* FALLTHROUGH */ + case MetaEntryKind_EndScope:{ + i32 index = stack.data[--stack.count]; + MetaEntry *ended = entries + index; + if (index == 0) { + assert(stack.count == 0 && ended->kind == MetaEntryKind_Shader); + // NOTE(rnp): emit an empty single permutation + if (s->permutations.count == 0) + da_push(ctx->arena, &s->permutations); + } else { + u32 local_flags = 0; + if (stack.count > 0 && entries[stack.data[stack.count - 1]].kind == MetaEntryKind_Shader) { + MetaShader *fill = s; + if (ended->kind == MetaEntryKind_SubShader) { + fill = da_push(ctx->arena, &ctx->shaders); + u32 sid = (u32)da_index(fill, &ctx->shaders); + *da_push(ctx->arena, &sg->shaders) = sid; + *da_push(ctx->arena, &base_shader->sub_shaders) = sid; + + fill->flag_list_id = s->flag_list_id; + fill->base_name_id = meta_pack_shader_name(ctx, ended->name, ended->location); + local_flags = 1u << meta_commit_shader_flag(ctx, s->flag_list_id, ended->name, ended); + in_sub_shader = 0; + } + meta_emit_shader_permutations(ctx, scratch, fill, local_flags, ended, result - index + 1); + } + } + }break; + + default: + error: + { + meta_entry_error(e, "invalid nested @%s() in @%s()\n", + meta_entry_kind_strings[e->kind], + meta_entry_kind_strings[MetaEntryKind_Shader]); + }break; + } + if (stack.count == 0) + break; + } + + return result; +} + +function MetaPermutation +metagen_unpack_permutation(MetaContext *ctx, u32 packed) +{ + MetaPermutation result; + result.kind = (iz)(packed >> 16u); + result.variation = (iz)(packed & 0xFFFFu); + assert(result.kind < ctx->permutation_kinds.count); + assert(result.variation < ctx->permutations_for_kind.data[result.kind].count); + return result; +} + +function s8 +metagen_permutation_kind(MetaContext *ctx, u32 packed) +{ + MetaPermutation p = metagen_unpack_permutation(ctx, packed); + s8 result = ctx->permutation_kinds.data[p.kind]; + return result; +} + +function s8 +metagen_permutation_variation(MetaContext *ctx, u32 packed) +{ + MetaPermutation p = metagen_unpack_permutation(ctx, packed); + s8 result = ctx->permutations_for_kind.data[p.kind].data[p.variation]; + return result; +} + +function void +metagen_push_table(MetaprogramContext *m, Arena scratch, s8 row_start, s8 row_end, + s8 **column_strings, uz rows, uz columns) +{ + u32 *column_widths = 0; + if (columns > 1) { + column_widths = push_array(&scratch, u32, (iz)columns - 1); + for (uz column = 0; column < columns - 1; column++) { + s8 *strings = column_strings[column]; + for (uz row = 0; row < rows; row++) + column_widths[column] = MAX(column_widths[column], (u32)strings[row].len); + } + } + + for (uz row = 0; row < rows; row++) { + meta_begin_line(m, row_start); + for (uz column = 0; column < columns; column++) { + s8 text = column_strings[column][row]; + meta_push(m, text); + i32 pad = columns > 1 ? 1 : 0; + if (column_widths && column < columns - 1) + pad += (i32)column_widths[column] - (i32)text.len; + if (column < columns - 1) meta_pad(m, ' ', pad); + } + meta_end_line(m, row_end); + } +} + +function void +metagen_push_c_struct(MetaprogramContext *m, s8 kind, s8 *types, uz types_count, s8 *fields, uz fields_count) +{ + assert(fields_count == types_count); + meta_begin_scope(m, s8("typedef struct {")); + metagen_push_table(m, m->scratch, s8(""), s8(";"), (s8 *[]){types, fields}, fields_count, 2); + meta_end_scope(m, s8("} "), kind, s8(";\n")); +} + +function void +metagen_push_counted_enum_body(MetaprogramContext *m, s8 kind, s8 prefix, s8 mid, s8 suffix, s8 *ids, iz ids_count) +{ + iz max_id_length = 0; + for (iz id = 0; id < ids_count; id++) + max_id_length = MAX(max_id_length, ids[id].len); + + for (iz id = 0; id < ids_count; id++) { + meta_begin_line(m, prefix, kind, ids[id]); + meta_pad(m, ' ', 1 + (i32)(max_id_length - ids[id].len)); + meta_push(m, mid); + meta_push_u64(m, (u64)id); + meta_end_line(m, suffix); + } +} + +function void +metagen_push_c_enum(MetaprogramContext *m, Arena scratch, s8 kind, s8 *ids, iz ids_count) +{ + s8 kind_full = push_s8_from_parts(&scratch, s8(""), kind, s8("_")); + meta_begin_scope(m, s8("typedef enum {")); + metagen_push_counted_enum_body(m, kind_full, s8(""), s8("= "), s8(","), ids, ids_count); + meta_push_line(m, kind_full, s8("Count,")); + meta_end_scope(m, s8("} "), kind, s8(";\n")); +} + +function void +metagen_push_c_flag_enum(MetaprogramContext *m, Arena scratch, s8 kind, s8 *ids, iz ids_count) +{ + s8 kind_full = push_s8_from_parts(&scratch, s8(""), kind, s8("_")); + meta_begin_scope(m, s8("typedef enum {")); + metagen_push_counted_enum_body(m, kind_full, s8(""), s8("= (1 << "), s8("),"), ids, ids_count); + meta_end_scope(m, s8("} "), kind, s8(";\n")); +} + +function void +metagen_push_shader_derivative_vectors(MetaContext *ctx, MetaprogramContext *m, MetaShader *s, + i32 sub_field_count, b32 has_local_flags) +{ + meta_push_line(m, s8("// "), ctx->shader_names.data[s->base_name_id]); + for (iz perm = 0; perm < s->permutations.count; perm++) { + MetaShaderPermutation *p = s->permutations.data + perm; + if (!has_local_flags && sub_field_count == 0) { + meta_push_line(m, s8("0,")); + } else { + meta_begin_line(m, s8("(i32 []){")); + for (u8 id = 0; id < p->global_flags_count; id++) { + s8 kind = metagen_permutation_kind(ctx, p->global_flags[id]); + s8 variation = metagen_permutation_variation(ctx, p->global_flags[id]); + if (id != 0) meta_push(m, s8(", ")); + meta_push(m, s8("Beamformer"), kind, s8("_"), variation); + } + + for (i32 id = p->global_flags_count; id < sub_field_count; id++) + meta_push(m, s8(", -1")); + + // NOTE(rnp): local flag names + if (has_local_flags) { + u64 local_flags = 0; + for (u8 id = 0; id < p->local_flags_count; id++) + local_flags |= p->local_flags[id]; + + meta_push(m, s8(", 0x")); + meta_push_u64_hex(m, local_flags); + } + meta_end_line(m, s8("},")); + } + } +} + +function void +meta_push_shader_descriptors_table(MetaprogramContext *m, MetaContext *ctx) +{ + Arena scratch_start = m->scratch; + s8 *columns[4]; + for EachElement(columns, it) + columns[it] = push_array(&m->scratch, s8, ctx->shaders.count); + + Stream sb = arena_stream(m->scratch); + for (iz shader = 0; shader < ctx->shaders.count; shader++) { + MetaShaderDescriptor *sd = ctx->shader_descriptors + shader; + + stream_append_u64(&sb, (u64)sd->first_match_vector_index); + stream_append_byte(&sb, ','); + columns[0][shader] = arena_stream_commit_and_reset(&m->scratch, &sb); + + stream_append_u64(&sb, (u64)sd->one_past_last_match_vector_index); + stream_append_byte(&sb, ','); + columns[1][shader] = arena_stream_commit_and_reset(&m->scratch, &sb); + + stream_append_u64(&sb, (u64)sd->sub_field_count); + stream_append_byte(&sb, ','); + columns[2][shader] = arena_stream_commit_and_reset(&m->scratch, &sb); + + columns[3][shader] = sd->has_local_flags ? s8("1") : s8 ("0"); + } + + meta_begin_scope(m, s8("read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = {")); + metagen_push_table(m, m->scratch, s8("{"), s8("},"), columns, (u32)ctx->shaders.count, countof(columns)); + meta_end_scope(m, s8("};\n")); + + m->scratch = scratch_start; +} + +function void +meta_push_shader_reload_info(MetaprogramContext *m, MetaContext *ctx) +{ + /////////////////////////////// + // NOTE(rnp): reloadable infos + i32 max_shader_name_length = 0; + for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { + if (ctx->base_shaders.data[shader].file.len == 0) continue; + s8 name = ctx->shader_names.data[ctx->base_shaders.data[shader].shader->base_name_id]; + max_shader_name_length = MAX((i32)name.len, max_shader_name_length); + } + + meta_begin_scope(m, s8("read_only global BeamformerReloadableShaderInfo beamformer_reloadable_shader_infos[] = {")); + for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { + MetaBaseShader *bs = ctx->base_shaders.data + shader; + MetaShader *s = bs->shader; + + if (bs->file.len == 0) continue; + + s8 name = ctx->shader_names.data[s->base_name_id]; + meta_begin_line(m, s8("{BeamformerShaderKind_"), name, s8(", ")); + meta_pad(m, ' ', max_shader_name_length - (i32)name.len); + meta_push_u64(m, (u64)bs->sub_shaders.count); + + if (bs->sub_shaders.count) { + meta_push(m, s8(", (i32 []){")); + for (iz sub_shader = 0; sub_shader < bs->sub_shaders.count; sub_shader++) { + if (sub_shader != 0) meta_push(m, s8(", ")); + meta_push_u64(m, bs->sub_shaders.data[sub_shader]); + } + meta_push(m, s8("}")); + } else { + meta_push(m, s8(", 0")); + } + meta_end_line(m, s8("},")); + } + meta_end_scope(m, s8("};\n")); + + meta_begin_scope(m, s8("read_only global s8 beamformer_reloadable_shader_files[] = {")); + for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { + MetaBaseShader *bs = ctx->base_shaders.data + shader; + if (bs->file.len == 0) continue; + meta_push_line(m, s8("s8_comp(\""), bs->file, s8("\"),")); + } + meta_end_scope(m, s8("};\n")); + + { + u32 info_index = 0; + for (iz group = 0; group < ctx->shader_groups.count; group++) { + MetaShaderGroup *sg = ctx->shader_groups.data + group; + meta_begin_line(m, s8("read_only global i32 beamformer_reloadable_")); + for (iz i = 0; i < sg->name.len; i++) + stream_append_byte(&m->stream, TOLOWER(sg->name.data[i])); + meta_begin_scope(m, s8("_shader_info_indices[] = {")); + + for (iz shader = 0; shader < sg->shaders.count; shader++) { + MetaShader *s = ctx->shaders.data + sg->shaders.data[shader]; + /* TODO(rnp): store base shader list in a better format */ + for (iz base_shader = 0; base_shader < ctx->base_shaders.count; base_shader++) { + MetaBaseShader *bs = ctx->base_shaders.data + base_shader; + if (bs->file.len && bs->shader == s) { + meta_indent(m); + meta_push_u64(m, info_index++); + meta_end_line(m, s8(",")); + break; + } + } + } + meta_end_scope(m, s8("};\n")); + } + } + + //////////////////////////////////// + // NOTE(rnp): shader header strings + meta_begin_scope(m, s8("read_only global s8 beamformer_shader_global_header_strings[] = {")); + for (iz kind = 0; kind < ctx->permutation_kinds.count; kind++) { + s8_list *sub_list = ctx->permutations_for_kind.data + kind; + s8 kind_name = push_s8_from_parts(&m->scratch, s8(""), ctx->permutation_kinds.data[kind], s8("_")); + meta_push_line(m, s8("s8_comp(\"\"")); + metagen_push_counted_enum_body(m, kind_name, s8("\"#define "), s8(""), s8("\\n\""), + sub_list->data, sub_list->count); + meta_push_line(m, s8("\"\\n\"),")); + m->scratch = ctx->scratch; + } + meta_end_scope(m, s8("};\n")); + + meta_begin_scope(m, s8("read_only global s8 beamformer_shader_local_header_strings[] = {")); + for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { + if (ctx->base_shaders.data[shader].file.len == 0) continue; + + MetaShader *s = ctx->base_shaders.data[shader].shader; + s8_list *flag_list = ctx->flags_for_shader.data + s->flag_list_id; + + if (flag_list->count) { + meta_push_line(m, s8("s8_comp(\"\"")); + metagen_push_counted_enum_body(m, s8("ShaderFlags_"), s8("\"#define "), s8("(1 << "), s8(")\\n\""), + flag_list->data, flag_list->count); + meta_push_line(m, s8("\"\\n\"),")); + } else { + meta_push_line(m, s8("{0},")); + } + } + meta_end_scope(m, s8("};\n")); + + meta_begin_scope(m, s8("read_only global s8 beamformer_shader_descriptor_header_strings[] = {")); + for (iz kind = 0; kind < ctx->permutation_kinds.count; kind++) + meta_push_line(m, s8("s8_comp(\""), ctx->permutation_kinds.data[kind], s8("\"),")); + meta_end_scope(m, s8("};\n")); +} + +function void +meta_push_shader_match_helper(MetaprogramContext *m, MetaContext *ctx, MetaShader *s, MetaShaderDescriptor *sd) +{ + s8 name = ctx->shader_names.data[s->base_name_id]; + meta_push_line(m, s8("function iz")); + meta_begin_line(m, s8("beamformer_shader_")); + for (iz i = 0; i < name.len; i++) + stream_append_byte(&m->stream, TOLOWER(name.data[i])); + meta_push(m, s8("_match(")); + + assert(s->global_flag_ids.count < 27); + for (iz flag = 0; flag < s->global_flag_ids.count; flag++) { + if (flag != 0) meta_push(m, s8(", ")); + u32 index = s->global_flag_ids.data[flag]; + meta_push(m, s8("Beamformer"), ctx->permutation_kinds.data[index], s8(" ")); + stream_append_byte(&m->stream, (u8)((iz)'a' + flag)); + } + if (sd->has_local_flags) { + if (s->global_flag_ids.count) meta_push(m, s8(", ")); + meta_push(m, s8("i32 flags")); + } + meta_end_line(m, s8(")")); + + meta_begin_scope(m, s8("{")); + meta_begin_line(m, s8("iz result = beamformer_shader_match((i32 []){(i32)")); + for (iz flag = 0; flag < s->global_flag_ids.count; flag++) { + if (flag != 0) meta_push(m, s8(", (i32)")); + stream_append_byte(&m->stream, (u8)((iz)'a' + flag)); + } + if (sd->has_local_flags) { + if (s->global_flag_ids.count) meta_push(m, s8(", ")); + meta_push(m, s8("flags")); + } + meta_push(m, s8("}, ")); + meta_push_u64(m, (u64)sd->first_match_vector_index); + meta_push(m, s8(", ")); + meta_push_u64(m, (u64)sd->one_past_last_match_vector_index); + meta_push(m, s8(", ")); + meta_push_u64(m, (u64)sd->sub_field_count + sd->has_local_flags); + meta_end_line(m, s8(");")); + meta_push_line(m, s8("return result;")); + meta_end_scope(m, s8("}\n")); +} + +function b32 +metagen_emit_c_code(MetaContext *ctx, Arena arena) +{ + b32 result = 1; + + os_make_directory("generated"); + char *out = "generated/beamformer.meta.c"; + if (!needs_rebuild(out, "beamformer.meta")) + return result; + + build_log_generate("Core C Code"); - os_make_directory(OUTPUT("matlab/+OGLBeamformerFilter")); - #define X(kind, ...) {OUTPUT("matlab/+OGLBeamformerFilter/" #kind ".m"), s8_comp(#kind), s8_comp(#__VA_ARGS__)}, - read_only local_persist struct {char *out; s8 class, args;} filter_table[] = { - BEAMFORMER_FILTER_KIND_LIST(,) + MetaprogramContext meta_program = {.stream = arena_stream(arena), .scratch = ctx->scratch}; + MetaprogramContext *m = &meta_program; + + meta_push_line(m, s8("/* See LICENSE for license details. */\n")); + meta_push_line(m, s8("// GENERATED CODE\n")); + + ///////////////////////// + // NOTE(rnp): enumarents + for (iz kind = 0; kind < ctx->permutation_kinds.count; kind++) { + s8 enum_name = push_s8_from_parts(&m->scratch, s8(""), s8("Beamformer"), ctx->permutation_kinds.data[kind]); + metagen_push_c_enum(m, m->scratch, enum_name, ctx->permutations_for_kind.data[kind].data, + ctx->permutations_for_kind.data[kind].count); + m->scratch = ctx->scratch; + } + + for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { + MetaShader *s = ctx->base_shaders.data[shader].shader; + s8_list flag_list = ctx->flags_for_shader.data[s->flag_list_id]; + if (flag_list.count) { + s8 enum_name = push_s8_from_parts(&m->scratch, s8(""), s8("BeamformerShader"), + ctx->shader_names.data[s->base_name_id], s8("Flags")); + metagen_push_c_flag_enum(m, m->scratch, enum_name, flag_list.data, flag_list.count); + m->scratch = ctx->scratch; + } + } + + { + s8 kind = s8("BeamformerShaderKind"); + s8 kind_full = s8("BeamformerShaderKind_"); + meta_begin_scope(m, s8("typedef enum {")); + metagen_push_counted_enum_body(m, kind_full, s8(""), s8("= "), s8(","), + ctx->shader_names.data, ctx->shader_names.count); + meta_push_line(m, kind_full, s8("Count,\n")); + + s8 *columns[2]; + columns[0] = push_array(&m->scratch, s8, ctx->shader_groups.count * 3); + columns[1] = push_array(&m->scratch, s8, ctx->shader_groups.count * 3); + + for (iz group = 0; group < ctx->shader_groups.count; group++) { + MetaShaderGroup *sg = ctx->shader_groups.data + group; + + s8 first_name = ctx->shader_names.data[ctx->shaders.data[sg->shaders.data[0]].base_name_id]; + s8 last_name = ctx->shader_names.data[ctx->shaders.data[sg->shaders.data[sg->shaders.count - 1]].base_name_id]; + + columns[0][3 * group + 0] = push_s8_from_parts(&m->scratch, s8(""), kind, s8("_"), sg->name, s8("First")); + columns[1][3 * group + 0] = push_s8_from_parts(&m->scratch, s8(""), s8("= "), kind, s8("_"), first_name); + + columns[0][3 * group + 1] = push_s8_from_parts(&m->scratch, s8(""), kind, s8("_"), sg->name, s8("Last")); + columns[1][3 * group + 1] = push_s8_from_parts(&m->scratch, s8(""),s8("= "), kind, s8("_"), last_name); + + columns[0][3 * group + 2] = push_s8_from_parts(&m->scratch, s8(""), kind, s8("_"), sg->name, s8("Count")); + Stream sb = arena_stream(m->scratch); + stream_append_s8(&sb, s8("= ")); + stream_append_u64(&sb, (u64)sg->shaders.count); + columns[1][3 * group + 2] = arena_stream_commit(&m->scratch, &sb); + } + metagen_push_table(m, m->scratch, s8(""), s8(","), columns, (uz)ctx->shader_groups.count * 3, 2); + + meta_end_scope(m, s8("} "), kind, s8(";\n")); + m->scratch = ctx->scratch; + } + + ////////////////////// + // NOTE(rnp): structs + { + s8 name = s8_comp("BeamformerShaderDescriptor"); + s8 types[] = {s8_comp("i32"), s8_comp("i32"), s8_comp("i32"), s8_comp("b32")}; + s8 names[] = { + s8_comp("first_match_vector_index"), + s8_comp("one_past_last_match_vector_index"), + s8_comp("match_vector_length"), + s8_comp("has_local_flags"), }; - #undef X + metagen_push_c_struct(m, name, types, countof(types), names, countof(names)); + } - s8_list members = {0}; - for EachElement(filter_table, filter) { - typeof(*filter_table) *f = filter_table + filter; - members.count = 0; - s8_list_from_s8(&members, &scratch, f->args); - meta_begin_scope(&m, s8("classdef "), f->class, s8(" < OGLBeamformerFilter.BaseFilter")); - - meta_begin_scope(&m, s8("properties")); - for (iz it = 0; it < members.count; it++) - meta_push_matlab_property(&m, members.data[it], 1); - meta_end_scope(&m, s8("end")); - - meta_begin_scope(&m, s8("methods")); - meta_begin_line(&m, s8("function obj = "), f->class, s8("(")); - for (iz it = 0; it < members.count; it++) - meta_push(&m, it > 0 ? s8(", ") : s8(""), members.data[it]); - meta_end_line(&m, s8(")")); - - m.indentation_level++; - for (iz it = 0; it < members.count; it++) - meta_push_line(&m, s8("obj."), members.data[it], s8(" = "), members.data[it], s8(";")); - result &= meta_end_and_write_matlab(&m, f->out); + { + s8 name = s8_comp("BeamformerReloadableShaderInfo"); + s8 types[] = {s8_comp("BeamformerShaderKind"), s8_comp("i32"), s8_comp("i32 *")}; + s8 names[] = { + s8_comp("kind"), + s8_comp("sub_shader_descriptor_index_count"), + s8_comp("sub_shader_descriptor_indices"), + }; + metagen_push_c_struct(m, name, types, countof(types), names, countof(names)); + } + + /////////////////////////////////////// + // NOTE(rnp): shader descriptor tables + i32 match_vectors_count = 0; + meta_begin_scope(m, s8("read_only global i32 *beamformer_shader_match_vectors[] = {")); + for (iz shader = 0; shader < ctx->shaders.count; shader++) { + MetaShader *s = ctx->shaders.data + shader; + MetaShaderDescriptor *sd = ctx->shader_descriptors + shader; + metagen_push_shader_derivative_vectors(ctx, m, s, sd->sub_field_count, sd->has_local_flags); + match_vectors_count += (i32)s->permutations.count; + } + meta_end_scope(m, s8("};")); + meta_begin_line(m, s8("#define beamformer_match_vectors_count (")); + meta_push_u64(m, (u64)match_vectors_count); + meta_end_line(m, s8(")\n")); + + meta_push_shader_descriptors_table(m, ctx); + + ///////////////////////////////// + // NOTE(rnp): shader info tables + meta_begin_scope(m, s8("read_only global s8 beamformer_shader_names[] = {")); + metagen_push_table(m, m->scratch, s8("s8_comp(\""), s8("\"),"), &ctx->shader_names.data, + (uz)ctx->shader_names.count, 1); + meta_end_scope(m, s8("};\n")); + + meta_push_shader_reload_info(m, ctx); + + meta_begin_scope(m, s8("read_only global i32 *beamformer_shader_header_vectors[] = {")); + for (iz shader = 0; shader < ctx->shaders.count; shader++) { + MetaShader *s = ctx->shaders.data + shader; + + if (s->global_flag_ids.count) { + meta_begin_line(m, s8("(i32 []){")); + for (iz id = 0; id < s->global_flag_ids.count; id++) { + if (id != 0) meta_push(m, s8(", ")); + meta_push_u64(m, s->global_flag_ids.data[id]); + } + meta_end_line(m, s8("},")); + } else { + meta_push_line(m, s8("0,")); } + } + meta_end_scope(m, s8("};\n")); + + ////////////////////////////////////// + // NOTE(rnp): shader matching helpers + meta_push_line(m, s8("function iz")); + meta_push_line(m, s8("beamformer_shader_match(i32 *match_vector, i32 first_index, i32 one_past_last_index, i32 vector_length)")); + meta_begin_scope(m, s8("{")); + meta_push_line(m, s8("iz result = first_index;")); + meta_push_line(m, s8("i32 best_score = 0;")); + meta_push_line(m, s8("for (i32 index = first_index; index < one_past_last_index; index++)")); + meta_begin_scope(m, s8("{")); + meta_push_line(m, s8("i32 score = 0;")); + meta_push_line(m, s8("i32 *v = beamformer_shader_match_vectors[index];")); + meta_begin_scope(m, s8("for (i32 i = 0; i < vector_length; i++) {")); + meta_begin_scope(m, s8("if (match_vector[i] == v[i]) {")); + meta_push_line(m, s8("score++;")); + meta_end_scope(m, s8("}")); + meta_end_scope(m, s8("}")); + meta_begin_scope(m, s8("if (best_score < score) {")); + meta_push_line(m, s8("result = index;")); + meta_push_line(m, s8("best_score = score;")); + meta_end_scope(m, s8("}")); + meta_end_scope(m, s8("}")); + meta_push_line(m, s8("return result;")); + meta_end_scope(m, s8("}\n")); + + for (iz shader = 0; shader < ctx->shaders.count; shader++) { + MetaShader *s = ctx->shaders.data + shader; + MetaShaderDescriptor *sd = ctx->shader_descriptors + shader; + if (sd->sub_field_count || sd->has_local_flags) + meta_push_shader_match_helper(m, ctx, s, sd); + } - meta_begin_matlab_class(&m, "BaseFilter"); - meta_begin_scope(&m, s8("methods")); - meta_begin_scope(&m, s8("function out = Flatten(obj)")); - meta_push_line(&m, s8("fields = struct2cell(struct(obj));")); - meta_push_line(&m, s8("out = zeros(1, numel(fields));")); - meta_begin_scope(&m, s8("for i = 1:numel(fields)")); - meta_push_line(&m, s8("out(i) = fields{i};")); - result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/+OGLBeamformerFilter/BaseFilter.m")); - - #define X(name, __t, __s, elements, ...) meta_push_line(&m, s8(#name "(1," #elements ")")); - meta_begin_matlab_class(&m, "OGLBeamformerParameters"); - meta_begin_scope(&m, s8("properties")); - BEAMFORMER_PARAMS_HEAD - BEAMFORMER_UI_PARAMS - result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerParameters.m")); - - meta_begin_matlab_class(&m, "OGLBeamformerParametersHead"); - meta_begin_scope(&m, s8("properties")); - BEAMFORMER_PARAMS_HEAD - result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerParametersHead.m")); - - meta_begin_matlab_class(&m, "OGLBeamformerParametersUI"); - meta_begin_scope(&m, s8("properties")); - BEAMFORMER_UI_PARAMS - result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerParametersUI.m")); - #undef X + //fprintf(stderr, "%.*s\n", (i32)m.stream.widx, m.stream.data); - #define X(name, __t, __s, elements, ...) meta_push_matlab_property(&m, s8(#name), elements); - meta_begin_matlab_class(&m, "OGLBeamformerLiveImagingParameters"); - meta_begin_scope(&m, s8("properties")); - BEAMFORMER_LIVE_IMAGING_PARAMETERS_LIST - result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerLiveImagingParameters.m")); - #undef X + result = meta_write_and_reset(m, out); + + return result; +} + +function b32 +metagen_emit_matlab_code(MetaContext *ctx, Arena arena) +{ + b32 result = 1; + if (!needs_rebuild(OUTPUT("matlab/OGLBeamformerFilterKind.m"), "beamformer_parameters.h")) + return result; + + build_log_generate("MATLAB Bindings"); + /* TODO(rnp): recreate/clear directory incase these file names change */ + os_make_directory(OUTPUT("matlab")); + + MetaprogramContext meta_program = {.stream = arena_stream(arena), .scratch = ctx->scratch}; + MetaprogramContext *m = &meta_program; + + #define X(name, flag, ...) meta_push_line(m, s8(#name " (" str(flag) ")")); + meta_begin_matlab_class(m, "OGLBeamformerLiveFeedbackFlags", "int32"); + meta_begin_scope(m, s8("enumeration")); + BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST + result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerLiveFeedbackFlags.m")); + #undef X + + #define X(kind, ...) meta_push_matlab_enum_with_value(m, s8(#kind), BeamformerFilterKind_## kind); + meta_begin_matlab_class(m, "OGLBeamformerFilterKind", "int32"); + meta_begin_scope(m, s8("enumeration")); + BEAMFORMER_FILTER_KIND_LIST(,) + result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerFilterKind.m")); + #undef X + + #define X(kind, ...) meta_push_matlab_enum_with_value(m, s8(#kind), BeamformerTransmitMode_## kind); + meta_begin_matlab_class(m, "OGLBeamformerTransmitModes", "int32"); + meta_begin_scope(m, s8("enumeration")); + TRANSMIT_MODES_LIST + result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerTransmitModes.m")); + #undef X + + #define X(kind, ...) meta_push_matlab_enum_with_value(m, s8(#kind), BeamformerReceiveMode_## kind); + meta_begin_matlab_class(m, "OGLBeamformerReceiveModes", "int32"); + meta_begin_scope(m, s8("enumeration")); + RECEIVE_MODES_LIST + result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerReceiveModes.m")); + #undef X + + os_make_directory(OUTPUT("matlab/+OGLBeamformerFilter")); + #define X(kind, ...) {OUTPUT("matlab/+OGLBeamformerFilter/" #kind ".m"), s8_comp(#kind), s8_comp(#__VA_ARGS__)}, + read_only local_persist struct {char *out; s8 class, args;} filter_table[] = { + BEAMFORMER_FILTER_KIND_LIST(,) + }; + #undef X + + s8_list members = {0}; + for EachElement(filter_table, filter) { + typeof(*filter_table) *f = filter_table + filter; + members.count = 0; + s8_list_from_s8(&members, &m->scratch, f->args); + meta_begin_scope(m, s8("classdef "), f->class, s8(" < OGLBeamformerFilter.BaseFilter")); + + meta_begin_scope(m, s8("properties")); + for (iz it = 0; it < members.count; it++) + meta_push_matlab_property(m, members.data[it], 1); + meta_end_scope(m, s8("end")); + + meta_begin_scope(m, s8("methods")); + meta_begin_line(m, s8("function obj = "), f->class, s8("(")); + for (iz it = 0; it < members.count; it++) + meta_push(m, it > 0 ? s8(", ") : s8(""), members.data[it]); + meta_end_line(m, s8(")")); + + m->indentation_level++; + for (iz it = 0; it < members.count; it++) + meta_push_line(m, s8("obj."), members.data[it], s8(" = "), members.data[it], s8(";")); + result &= meta_end_and_write_matlab(m, f->out); + } + m->scratch = ctx->scratch; + + meta_begin_matlab_class(m, "BaseFilter"); + meta_begin_scope(m, s8("methods")); + meta_begin_scope(m, s8("function out = Flatten(obj)")); + meta_push_line(m, s8("fields = struct2cell(struct(obj));")); + meta_push_line(m, s8("out = zeros(1, numel(fields));")); + meta_begin_scope(m, s8("for i = 1:numel(fields)")); + meta_push_line(m, s8("out(i) = fields{i};")); + result &= meta_end_and_write_matlab(m, OUTPUT("matlab/+OGLBeamformerFilter/BaseFilter.m")); + + #define X(name, __t, __s, elements, ...) meta_push_line(m, s8(#name "(1," #elements ")")); + meta_begin_matlab_class(m, "OGLBeamformerParameters"); + meta_begin_scope(m, s8("properties")); + BEAMFORMER_PARAMS_HEAD + BEAMFORMER_UI_PARAMS + result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerParameters.m")); + + meta_begin_matlab_class(m, "OGLBeamformerParametersHead"); + meta_begin_scope(m, s8("properties")); + BEAMFORMER_PARAMS_HEAD + result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerParametersHead.m")); + + meta_begin_matlab_class(m, "OGLBeamformerParametersUI"); + meta_begin_scope(m, s8("properties")); + BEAMFORMER_UI_PARAMS + result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerParametersUI.m")); + #undef X + + #define X(name, __t, __s, elements, ...) meta_push_matlab_property(m, s8(#name), elements); + meta_begin_matlab_class(m, "OGLBeamformerLiveImagingParameters"); + meta_begin_scope(m, s8("properties")); + BEAMFORMER_LIVE_IMAGING_PARAMETERS_LIST + result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerLiveImagingParameters.m")); + #undef X + + meta_begin_matlab_class(m, "OGLBeamformerDataKind", "int32"); + meta_begin_scope(m, s8("enumeration")); + { + iz index = meta_lookup_string_slow(&ctx->permutation_kinds, s8("DataKind")); + if (index != -1) { + s8_list *kinds = ctx->permutations_for_kind.data + index; + metagen_push_counted_enum_body(m, s8(""), s8(""), s8("("), s8(")"), kinds->data, kinds->count); + } else { + build_log_failure("failed to find DataKind enum in meta info\n"); + } + result &= index != -1; } + result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerDataKind.m")); + m->scratch = ctx->scratch; + + meta_begin_matlab_class(m, "OGLBeamformerShaderStage", "int32"); + meta_begin_scope(m, s8("enumeration")); + { + iz index = -1; + for (iz group = 0; group < ctx->shader_groups.count; group++) { + if (s8_equal(ctx->shader_groups.data[group].name, s8("Compute"))) { + index = group; + break; + } + } + if (index != -1) { + MetaShaderGroup *sg = ctx->shader_groups.data + index; + /* TODO(rnp): this assumes that the shaders are sequential */ + s8 *names = ctx->shader_names.data + ctx->shaders.data[0].base_name_id; + metagen_push_counted_enum_body(m, s8(""), s8(""), s8("("), s8(")"), names, sg->shaders.count); + } else { + build_log_failure("failed to find Compute shader group in meta info\n"); + } + result &= index != -1; + } + result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerShaderStage.m")); + + meta_begin_matlab_class(m, "OGLBeamformerSamplingModes", "int32"); + meta_begin_scope(m, s8("enumeration")); + { + iz index = meta_lookup_string_slow(&ctx->permutation_kinds, s8("SamplingMode")); + if (index != -1) { + s8_list *kinds = ctx->permutations_for_kind.data + index; + metagen_push_counted_enum_body(m, s8(""), s8("m"), s8("("), s8(")"), kinds->data, kinds->count); + } else { + build_log_failure("failed to find SamplingModes enum in meta info\n"); + } + result &= index != -1; + } + result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerSamplingModes.m")); + + return result; +} + +function b32 +metagen_emit_helper_library_header(MetaContext *ctx, Arena arena) +{ + b32 result = 1; + char *out = OUTPUT("ogl_beamformer_lib.h"); + if (!needs_rebuild(out, "helpers/ogl_beamformer_lib_base.h", "beamformer.meta")) + return result; + + build_log_generate("Helper Library Header"); + + s8 parameters_header = os_read_whole_file(&arena, "beamformer_parameters.h"); + s8 base_header = os_read_whole_file(&arena, "helpers/ogl_beamformer_lib_base.h"); + + MetaprogramContext meta_program = {.stream = arena_stream(arena), .scratch = ctx->scratch}; + MetaprogramContext *m = &meta_program; + + meta_push_line(m, s8("/* See LICENSE for license details. */\n")); + meta_push_line(m, s8("// GENERATED CODE\n")); + + { + iz index = meta_lookup_string_slow(&ctx->permutation_kinds, s8("DataKind")); + if (index != -1) { + s8 enum_name = push_s8_from_parts(&m->scratch, s8(""), s8("Beamformer"), ctx->permutation_kinds.data[index]); + metagen_push_c_enum(m, m->scratch, enum_name, ctx->permutations_for_kind.data[index].data, + ctx->permutations_for_kind.data[index].count); + m->scratch = ctx->scratch; + } else { + build_log_failure("failed to find DataKind in meta info\n"); + } + } + + { + iz index = -1; + for (iz group = 0; group < ctx->shader_groups.count; group++) { + if (s8_equal(ctx->shader_groups.data[group].name, s8("Compute"))) { + index = group; + break; + } + } + if (index != -1) { + MetaShaderGroup *sg = ctx->shader_groups.data + index; + meta_begin_line(m, s8("#define BeamformerShaderKind_ComputeCount (")); + meta_push_u64(m, (u64)sg->shaders.count); + meta_end_line(m, s8(")\n")); + } else { + build_log_failure("failed to find Compute shader group in meta info\n"); + } + } + + meta_push(m, parameters_header, base_header); + result &= meta_write_and_reset(m, out); return result; } +function MetaContext * +metagen_load_context(Arena *arena) +{ + if (setjmp(compiler_jmp_buf)) { + /* NOTE(rnp): compiler error */ + return 0; + } + + MetaContext *ctx = push_struct(arena, MetaContext); + ctx->scratch = sub_arena(arena, MB(1), 16); + ctx->arena = arena; + + MetaContext *result = ctx; + + Arena scratch = ctx->scratch; + MetaEntryStack entries = meta_entry_stack_from_file(ctx->arena, scratch, "beamformer.meta"); + + i32 stack_items[32]; + struct { i32 *data; iz capacity; iz count; } stack = {stack_items, countof(stack_items), 0}; + + MetaShaderGroup *current_shader_group = 0; + for (iz i = 0; i < entries.count; i++) { + MetaEntry *e = entries.data + i; + //if (e->kind == MetaEntryKind_EndScope) depth--; + //meta_entry_print(e, depth, -1); + //if (e->kind == MetaEntryKind_BeginScope) depth++; + //continue; + + switch (e->kind) { + case MetaEntryKind_BeginScope:{ *da_push(&scratch, &stack) = (i32)(i - 1); }break; + case MetaEntryKind_EndScope:{ + i32 index = stack.data[--stack.count]; + MetaEntry *ended = entries.data + index; + switch (ended->kind) { + case MetaEntryKind_ShaderGroup:{ current_shader_group = 0; }break; + default:{}break; + } + }break; + case MetaEntryKind_ShaderGroup:{ + MetaShaderGroup *sg = da_push(ctx->arena, &ctx->shader_groups); + sg->name = e->name; + current_shader_group = sg; + }break; + case MetaEntryKind_Shader:{ + if (!current_shader_group) goto error; + i += meta_pack_shader(ctx, current_shader_group, scratch, e, entries.count - i); + }break; + + error: + default: + { + meta_entry_error(e, "invalid @%s() in global scope\n", meta_entry_kind_strings[e->kind]); + }break; + } + } + + ctx->shader_descriptors = push_array(ctx->arena, MetaShaderDescriptor, ctx->shaders.count); + { + i32 match_vectors_count = 0; + for (iz shader = 0; shader < ctx->shaders.count; shader++) { + MetaShader *s = ctx->shaders.data + shader; + MetaShaderDescriptor *sd = ctx->shader_descriptors + shader; + + for (iz perm = 0; perm < s->permutations.count; perm++) + sd->has_local_flags |= s->permutations.data[perm].local_flags_count != 0; + sd->sub_field_count = (i32)s->global_flag_ids.count; + sd->first_match_vector_index = match_vectors_count; + match_vectors_count += (i32)s->permutations.count; + sd->one_past_last_match_vector_index = match_vectors_count; + } + } + + result->arena = 0; + return result; +} + i32 main(i32 argc, char *argv[]) { @@ -967,10 +2434,17 @@ main(i32 argc, char *argv[]) Arena arena = os_alloc_arena(MB(8)); check_rebuild_self(arena, argc, argv); - Options options = parse_options(argc, argv); - os_make_directory(OUTDIR); + MetaContext *meta = metagen_load_context(&arena); + if (!meta) return 1; + + result &= metagen_emit_c_code(meta, arena); + result &= metagen_emit_helper_library_header(meta, arena); + result &= metagen_emit_matlab_code(meta, arena); + + Options options = parse_options(argc, argv); + CommandList c = cmd_base(&arena, &options); if (!check_build_raylib(arena, c, options.debug)) return 1; @@ -980,7 +2454,6 @@ main(i32 argc, char *argv[]) ///////////////// // helpers/tests - result &= build_matlab_bindings(arena); result &= build_helper_library(arena, c); if (options.tests) result &= build_tests(arena, c); diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c @@ -0,0 +1,278 @@ +/* See LICENSE for license details. */ + +// GENERATED CODE + +typedef enum { + BeamformerDataKind_Int16 = 0, + BeamformerDataKind_Int16Complex = 1, + BeamformerDataKind_Float32 = 2, + BeamformerDataKind_Float32Complex = 3, + BeamformerDataKind_Count, +} BeamformerDataKind; + +typedef enum { + BeamformerSamplingMode_2X = 0, + BeamformerSamplingMode_4X = 1, + BeamformerSamplingMode_Count, +} BeamformerSamplingMode; + +typedef enum { + BeamformerShaderFilterFlags_MapChannels = (1 << 0), + BeamformerShaderFilterFlags_ComplexFilter = (1 << 1), + BeamformerShaderFilterFlags_Demodulate = (1 << 2), +} BeamformerShaderFilterFlags; + +typedef enum { + BeamformerShaderDASFlags_Fast = (1 << 0), + BeamformerShaderDASFlags_Sparse = (1 << 1), +} BeamformerShaderDASFlags; + +typedef enum { + BeamformerShaderKind_CudaDecode = 0, + BeamformerShaderKind_CudaHilbert = 1, + BeamformerShaderKind_Decode = 2, + BeamformerShaderKind_Filter = 3, + BeamformerShaderKind_Demodulate = 4, + BeamformerShaderKind_DAS = 5, + BeamformerShaderKind_MinMax = 6, + BeamformerShaderKind_Sum = 7, + BeamformerShaderKind_Render3D = 8, + BeamformerShaderKind_Count, + + BeamformerShaderKind_ComputeFirst = BeamformerShaderKind_CudaDecode, + BeamformerShaderKind_ComputeLast = BeamformerShaderKind_Sum, + BeamformerShaderKind_ComputeCount = 8, + BeamformerShaderKind_RenderFirst = BeamformerShaderKind_Render3D, + BeamformerShaderKind_RenderLast = BeamformerShaderKind_Render3D, + BeamformerShaderKind_RenderCount = 1, +} BeamformerShaderKind; + +typedef struct { + i32 first_match_vector_index; + i32 one_past_last_match_vector_index; + i32 match_vector_length; + b32 has_local_flags; +} BeamformerShaderDescriptor; + +typedef struct { + BeamformerShaderKind kind; + i32 sub_shader_descriptor_index_count; + i32 * sub_shader_descriptor_indices; +} BeamformerReloadableShaderInfo; + +read_only global i32 *beamformer_shader_match_vectors[] = { + // CudaDecode + 0, + // CudaHilbert + 0, + // Decode + (i32 []){BeamformerDataKind_Int16}, + (i32 []){BeamformerDataKind_Int16Complex}, + (i32 []){BeamformerDataKind_Float32}, + (i32 []){BeamformerDataKind_Float32Complex}, + // Filter + (i32 []){BeamformerDataKind_Int16Complex, 0x00}, + (i32 []){BeamformerDataKind_Int16Complex, 0x01}, + (i32 []){BeamformerDataKind_Int16Complex, 0x02}, + (i32 []){BeamformerDataKind_Int16Complex, 0x03}, + (i32 []){BeamformerDataKind_Float32, 0x00}, + (i32 []){BeamformerDataKind_Float32, 0x01}, + (i32 []){BeamformerDataKind_Float32, 0x02}, + (i32 []){BeamformerDataKind_Float32, 0x03}, + (i32 []){BeamformerDataKind_Float32Complex, 0x00}, + (i32 []){BeamformerDataKind_Float32Complex, 0x01}, + (i32 []){BeamformerDataKind_Float32Complex, 0x02}, + (i32 []){BeamformerDataKind_Float32Complex, 0x03}, + // Demodulate + (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x04}, + (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x05}, + (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x06}, + (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x07}, + (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x04}, + (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x05}, + (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x06}, + (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x07}, + (i32 []){BeamformerDataKind_Int16, -1, 0x04}, + (i32 []){BeamformerDataKind_Int16, -1, 0x05}, + (i32 []){BeamformerDataKind_Int16, -1, 0x06}, + (i32 []){BeamformerDataKind_Int16, -1, 0x07}, + (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x04}, + (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x05}, + (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x06}, + (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x07}, + (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x04}, + (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x05}, + (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x06}, + (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x07}, + (i32 []){BeamformerDataKind_Float32, -1, 0x04}, + (i32 []){BeamformerDataKind_Float32, -1, 0x05}, + (i32 []){BeamformerDataKind_Float32, -1, 0x06}, + (i32 []){BeamformerDataKind_Float32, -1, 0x07}, + // DAS + (i32 []){BeamformerDataKind_Float32, 0x00}, + (i32 []){BeamformerDataKind_Float32, 0x01}, + (i32 []){BeamformerDataKind_Float32, 0x02}, + (i32 []){BeamformerDataKind_Float32, 0x03}, + (i32 []){BeamformerDataKind_Float32Complex, 0x00}, + (i32 []){BeamformerDataKind_Float32Complex, 0x01}, + (i32 []){BeamformerDataKind_Float32Complex, 0x02}, + (i32 []){BeamformerDataKind_Float32Complex, 0x03}, + // MinMax + 0, + // Sum + 0, + // Render3D + 0, +}; +#define beamformer_match_vectors_count (53) + +read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = { + {0, 1, 0, 0}, + {1, 2, 0, 0}, + {2, 6, 1, 0}, + {6, 18, 1, 1}, + {18, 42, 2, 1}, + {42, 50, 1, 1}, + {50, 51, 0, 0}, + {51, 52, 0, 0}, + {52, 53, 0, 0}, +}; + +read_only global s8 beamformer_shader_names[] = { + s8_comp("CudaDecode"), + s8_comp("CudaHilbert"), + s8_comp("Decode"), + s8_comp("Filter"), + s8_comp("Demodulate"), + s8_comp("DAS"), + s8_comp("MinMax"), + s8_comp("Sum"), + s8_comp("Render3D"), +}; + +read_only global BeamformerReloadableShaderInfo beamformer_reloadable_shader_infos[] = { + {BeamformerShaderKind_Decode, 0, 0}, + {BeamformerShaderKind_Filter, 1, (i32 []){4}}, + {BeamformerShaderKind_DAS, 0, 0}, + {BeamformerShaderKind_MinMax, 0, 0}, + {BeamformerShaderKind_Sum, 0, 0}, + {BeamformerShaderKind_Render3D, 0, 0}, +}; + +read_only global s8 beamformer_reloadable_shader_files[] = { + s8_comp("decode.glsl"), + s8_comp("filter.glsl"), + s8_comp("das.glsl"), + s8_comp("min_max.glsl"), + s8_comp("sum.glsl"), + s8_comp("render_3d.frag.glsl"), +}; + +read_only global i32 beamformer_reloadable_compute_shader_info_indices[] = { + 0, + 1, + 2, + 3, + 4, +}; + +read_only global i32 beamformer_reloadable_render_shader_info_indices[] = { + 5, +}; + +read_only global s8 beamformer_shader_global_header_strings[] = { + s8_comp("" + "#define DataKind_Int16 0\n" + "#define DataKind_Int16Complex 1\n" + "#define DataKind_Float32 2\n" + "#define DataKind_Float32Complex 3\n" + "\n"), + s8_comp("" + "#define SamplingMode_2X 0\n" + "#define SamplingMode_4X 1\n" + "\n"), +}; + +read_only global s8 beamformer_shader_local_header_strings[] = { + {0}, + s8_comp("" + "#define ShaderFlags_MapChannels (1 << 0)\n" + "#define ShaderFlags_ComplexFilter (1 << 1)\n" + "#define ShaderFlags_Demodulate (1 << 2)\n" + "\n"), + s8_comp("" + "#define ShaderFlags_Fast (1 << 0)\n" + "#define ShaderFlags_Sparse (1 << 1)\n" + "\n"), + {0}, + {0}, + {0}, +}; + +read_only global s8 beamformer_shader_descriptor_header_strings[] = { + s8_comp("DataKind"), + s8_comp("SamplingMode"), +}; + +read_only global i32 *beamformer_shader_header_vectors[] = { + 0, + 0, + (i32 []){0}, + (i32 []){0}, + (i32 []){0, 1}, + (i32 []){0}, + 0, + 0, + 0, +}; + +function iz +beamformer_shader_match(i32 *match_vector, i32 first_index, i32 one_past_last_index, i32 vector_length) +{ + iz result = first_index; + i32 best_score = 0; + for (i32 index = first_index; index < one_past_last_index; index++) + { + i32 score = 0; + i32 *v = beamformer_shader_match_vectors[index]; + for (i32 i = 0; i < vector_length; i++) { + if (match_vector[i] == v[i]) { + score++; + } + } + if (best_score < score) { + result = index; + best_score = score; + } + } + return result; +} + +function iz +beamformer_shader_decode_match(BeamformerDataKind a) +{ + iz result = beamformer_shader_match((i32 []){(i32)a}, 2, 6, 1); + return result; +} + +function iz +beamformer_shader_filter_match(BeamformerDataKind a, i32 flags) +{ + iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 6, 18, 2); + return result; +} + +function iz +beamformer_shader_demodulate_match(BeamformerDataKind a, BeamformerSamplingMode b, i32 flags) +{ + iz result = beamformer_shader_match((i32 []){(i32)a, (i32)b, flags}, 18, 42, 3); + return result; +} + +function iz +beamformer_shader_das_match(BeamformerDataKind a, i32 flags) +{ + iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 42, 50, 2); + return result; +} + diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c @@ -2,6 +2,7 @@ #include "../compiler.h" #include "../util.h" +#include "../generated/beamformer.meta.c" #include "../beamformer_parameters.h" #include "ogl_beamformer_lib_base.h" @@ -225,7 +226,7 @@ validate_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind) BF_LIB_ERR_KIND_COMPUTE_STAGE_OVERFLOW)) { for (u32 i = 0; i < shader_count; i++) - result &= BETWEEN(shaders[i], 0, BeamformerShaderKind_ComputeCount); + result &= BETWEEN(shaders[i], BeamformerShaderKind_ComputeFirst, BeamformerShaderKind_ComputeLast); if (!result) { g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_COMPUTE_STAGE; } else if (shaders[0] != BeamformerShaderKind_Demodulate && diff --git a/intrinsics.c b/intrinsics.c @@ -112,6 +112,17 @@ ctz_u32(u32 a) return result; } +function force_inline u64 +ctz_u64(u64 a) +{ + u64 result = 64, index; + if (a) { + _BitScanForward64(&index, a); + result = index; + } + return result; +} + #else /* !COMPILER_MSVC */ function force_inline u32 @@ -130,6 +141,14 @@ ctz_u32(u32 a) return result; } +function force_inline u64 +ctz_u64(u64 a) +{ + u64 result = 64; + if (a) result = (u64)__builtin_ctzll(a); + return result; +} + #endif #if ARCH_ARM64 diff --git a/os_linux.c b/os_linux.c @@ -223,17 +223,17 @@ os_unload_library(void *h) function OS_ADD_FILE_WATCH_FN(os_add_file_watch) { s8 directory = path; - directory.len = s8_scan_backwards(path, '/'); - ASSERT(directory.len > 0); + directory.len = s8_scan_backwards(path, OS_PATH_SEPARATOR_CHAR); + assert(directory.len > 0); u64 hash = s8_hash(directory); FileWatchContext *fwctx = &os->file_watch_context; FileWatchDirectory *dir = lookup_file_watch_directory(fwctx, hash); if (!dir) { - ASSERT(path.data[directory.len] == '/'); + assert(path.data[directory.len] == OS_PATH_SEPARATOR_CHAR); dir = da_push(a, fwctx); dir->hash = hash; - dir->name = push_s8_zero(a, directory); + dir->name = push_s8(a, directory); u32 mask = IN_MOVED_TO|IN_CLOSE_WRITE; dir->handle = inotify_add_watch((i32)fwctx->handle, (c8 *)dir->name.data, mask); } diff --git a/os_win32.c b/os_win32.c @@ -338,18 +338,18 @@ os_unload_library(void *h) function OS_ADD_FILE_WATCH_FN(os_add_file_watch) { s8 directory = path; - directory.len = s8_scan_backwards(path, '\\'); - ASSERT(directory.len > 0); + directory.len = s8_scan_backwards(path, OS_PATH_SEPARATOR_CHAR); + assert(directory.len > 0); u64 hash = s8_hash(directory); FileWatchContext *fwctx = &os->file_watch_context; FileWatchDirectory *dir = lookup_file_watch_directory(fwctx, hash); if (!dir) { - ASSERT(path.data[directory.len] == '\\'); + assert(path.data[directory.len] == OS_PATH_SEPARATOR_CHAR); dir = da_push(a, fwctx); dir->hash = hash; - dir->name = push_s8_zero(a, directory); + dir->name = push_s8(a, directory); dir->handle = CreateFileA((c8 *)dir->name.data, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS|FILE_FLAG_OVERLAPPED, 0); diff --git a/shaders/das.glsl b/shaders/das.glsl @@ -3,7 +3,9 @@ layout(std430, binding = 1) readonly restrict buffer buffer_1 { vec2 rf_data[]; }; -#if DAS_FAST +const bool sparse = (ShaderFlags & ShaderFlags_Sparse) != 0; + +#if (ShaderFlags & ShaderFlags_Fast) layout(rg32f, binding = 0) restrict uniform image3D u_out_data_tex; #else layout(rg32f, binding = 0) writeonly restrict uniform image3D u_out_data_tex; @@ -102,7 +104,7 @@ float cylindrical_wave_transmit_distance(vec3 point, float focal_depth, float tr return distance(rca_plane_projection(point, tx_rows), f); } -#if DAS_FAST +#if (ShaderFlags & ShaderFlags_Fast) vec3 RCA(vec3 world_point) { bool tx_rows = bool((shader_flags & ShaderFlags_TxColumns) == 0); @@ -170,10 +172,9 @@ vec3 RCA(vec3 world_point) } #endif -#if DAS_FAST +#if (ShaderFlags & ShaderFlags_Fast) vec3 HERCULES(vec3 world_point) { - bool uhercules = shader_kind == ShaderKind_UHERCULES; vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; bool tx_rows = bool((shader_flags & ShaderFlags_TxColumns) == 0); bool rx_cols = bool((shader_flags & ShaderFlags_RxColumns)); @@ -190,8 +191,8 @@ vec3 HERCULES(vec3 world_point) } vec2 result = vec2(0); - for (int transmit = int(uhercules); transmit < acquisition_count; transmit++) { - int tx_channel = uhercules ? imageLoad(sparse_elements, transmit - int(uhercules)).x : transmit; + for (int transmit = int(sparse); transmit < acquisition_count; transmit++) { + int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit; vec3 element_position; if (rx_cols) element_position = vec3(u_channel, tx_channel, 0) * vec3(xdc_element_pitch, 0); else element_position = vec3(tx_channel, u_channel, 0) * vec3(xdc_element_pitch, 0); @@ -211,7 +212,6 @@ vec3 HERCULES(vec3 world_point) #else vec3 HERCULES(vec3 world_point) { - bool uhercules = shader_kind == ShaderKind_UHERCULES; vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; bool tx_rows = bool((shader_flags & ShaderFlags_TxColumns) == 0); bool rx_cols = bool((shader_flags & ShaderFlags_RxColumns)); @@ -228,8 +228,8 @@ vec3 HERCULES(vec3 world_point) } vec3 result = vec3(0); - for (int transmit = int(uhercules); transmit < acquisition_count; transmit++) { - int tx_channel = uhercules ? imageLoad(sparse_elements, transmit - int(uhercules)).x : transmit; + for (int transmit = int(sparse); transmit < acquisition_count; transmit++) { + int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit; for (int rx_channel = 0; rx_channel < channel_count; rx_channel++) { vec3 element_position; if (rx_cols) element_position = vec3(rx_channel, tx_channel, 0) * vec3(xdc_element_pitch, 0); @@ -251,10 +251,9 @@ vec3 HERCULES(vec3 world_point) } #endif -#if DAS_FAST +#if (ShaderFlags & ShaderFlags_Fast) vec3 FORCES(vec3 world_point) { - bool uforces = shader_kind == ShaderKind_UFORCES; vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; float receive_distance = distance(xdc_world_point.xz, vec2(u_channel * xdc_element_pitch.x, 0)); float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.z) * @@ -262,8 +261,8 @@ vec3 FORCES(vec3 world_point) vec2 result = vec2(0); if (apodization > 0) { - for (int transmit = int(uforces); transmit < acquisition_count; transmit++) { - int tx_channel = uforces ? imageLoad(sparse_elements, transmit - int(uforces)).x : transmit; + for (int transmit = int(sparse); transmit < acquisition_count; transmit++) { + int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit; vec3 transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(channel_count / 2)), 0); float sidx = sample_index(distance(xdc_world_point, transmit_center) + receive_distance); @@ -275,7 +274,6 @@ vec3 FORCES(vec3 world_point) #else vec3 FORCES(vec3 world_point) { - bool uforces = shader_kind == ShaderKind_UFORCES; vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; vec3 result = vec3(0); @@ -284,8 +282,8 @@ vec3 FORCES(vec3 world_point) float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.z) * (xdc_world_point.x - rx_channel * xdc_element_pitch.x)); if (apodization > 0) { - for (int transmit = int(uforces); transmit < acquisition_count; transmit++) { - int tx_channel = uforces ? imageLoad(sparse_elements, transmit - int(uforces)).x : transmit; + for (int transmit = int(sparse); transmit < acquisition_count; transmit++) { + int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit; vec3 transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(channel_count / 2)), 0); float sidx = sample_index(distance(xdc_world_point, transmit_center) + receive_distance); @@ -301,7 +299,7 @@ vec3 FORCES(vec3 world_point) void main() { ivec3 out_voxel = ivec3(gl_GlobalInvocationID); -#if DAS_FAST +#if (ShaderFlags & ShaderFlags_Fast) vec3 sum = vec3(imageLoad(u_out_data_tex, out_voxel).xy, 0); #else vec3 sum = vec3(0); diff --git a/shaders/decode.glsl b/shaders/decode.glsl @@ -8,19 +8,19 @@ * sample-transmit plane with the bound hadamard matrix. */ -#if defined(INPUT_DATA_TYPE_FLOAT) +#if DataKind == DataKind_Float32 #define INPUT_DATA_TYPE float #define RF_SAMPLES_PER_INDEX 1 #define RESULT_TYPE_CAST(x) vec4((x), 0, 0, 0) #define SAMPLE_DATA_TYPE float #define SAMPLE_TYPE_CAST(x) (x) -#elif defined(INPUT_DATA_TYPE_FLOAT_COMPLEX) +#elif DataKind == DataKind_Float32Complex #define INPUT_DATA_TYPE vec2 #define RF_SAMPLES_PER_INDEX 1 #define RESULT_TYPE_CAST(x) vec4((x), 0, 0) #define SAMPLE_DATA_TYPE vec2 #define SAMPLE_TYPE_CAST(x) (x) -#elif defined(INPUT_DATA_TYPE_INT16_COMPLEX) +#elif DataKind == DataKind_Int16Complex #define INPUT_DATA_TYPE int #define RF_SAMPLES_PER_INDEX 1 #define RESULT_TYPE_CAST(x) vec4((x), 0, 0) diff --git a/shaders/filter.glsl b/shaders/filter.glsl @@ -1,5 +1,5 @@ /* See LICENSE for license details. */ -#if defined(INPUT_DATA_TYPE_FLOAT) +#if DataKind == DataKind_Float32 #define DATA_TYPE vec2 #define RESULT_TYPE_CAST(v) (v) #define SAMPLE_TYPE_CAST(v) (v) @@ -19,7 +19,7 @@ layout(std430, binding = 2) writeonly restrict buffer buffer_2 { layout(r16i, binding = 1) readonly restrict uniform iimage1D channel_mapping; -#if COMPLEX_FILTER +#if (ShaderFlags & ShaderFlags_ComplexFilter) layout(rg32f, binding = 0) readonly restrict uniform image1D filter_coefficients; #define apply_filter(iq, h) complex_mul((iq), (h).xy) #else @@ -27,6 +27,8 @@ layout(r16i, binding = 1) readonly restrict uniform iimage1D channel_mapping; #define apply_filter(iq, h) ((iq) * (h).x) #endif +const bool map_channels = (ShaderFlags & ShaderFlags_MapChannels) != 0; + vec2 complex_mul(vec2 a, vec2 b) { mat2 m = mat2(b.x, b.y, -b.y, b.x); @@ -34,12 +36,12 @@ vec2 complex_mul(vec2 a, vec2 b) return result; } +#if (ShaderFlags & ShaderFlags_Demodulate) vec2 rotate_iq(vec2 iq, int index) { vec2 result; - /* TODO(rnp): this doesn't give us the same performance boost as hardcoding the mode */ - switch (shader_flags & ShaderFlags_SamplingModeMask) { - case SamplingMode_NS200BW:{ + switch (SamplingMode) { + case SamplingMode_4X:{ // fs = 2 * fd // arg = PI * index // cos -> 1 -1 1 -1 @@ -48,7 +50,7 @@ vec2 rotate_iq(vec2 iq, int index) if (bool(index & 1)) result = mat2(-1, 0, 0, -1) * iq; else result = mat2( 1, 0, 0, 1) * iq; }break; - case SamplingMode_BS100BW:{ + case SamplingMode_2X:{ // fs = fd // arg = 2 * PI * index // cos -> 1 1 1 1 @@ -64,6 +66,7 @@ vec2 rotate_iq(vec2 iq, int index) } return result; } +#endif vec2 sample_rf(uint index) { @@ -78,7 +81,6 @@ void main() uint channel = gl_GlobalInvocationID.y; uint transmit = gl_GlobalInvocationID.z; - bool map_channels = bool(shader_flags & ShaderFlags_MapChannels); uint in_channel = map_channels ? imageLoad(channel_mapping, int(channel)).x : channel; uint in_offset = input_channel_stride * in_channel + input_transmit_stride * transmit; uint out_offset = output_channel_stride * channel + @@ -100,12 +102,12 @@ void main() int b_length = imageSize(filter_coefficients).x; int index = int(in_sample); - const float scale = bool(COMPLEX_FILTER) ? 1 : sqrt(2); + const float scale = bool(ShaderFlags & ShaderFlags_ComplexFilter) ? 1 : sqrt(2); for (int j = max(0, index - b_length); j < min(index, a_length); j++) { vec2 iq = sample_rf(in_offset + j); vec4 h = imageLoad(filter_coefficients, index - j); - #if defined(DEMODULATE) + #if (ShaderFlags & ShaderFlags_Demodulate) result += scale * apply_filter(rotate_iq(iq * vec2(1, -1), -j), h); #else result += apply_filter(iq, h); diff --git a/static.c b/static.c @@ -74,8 +74,6 @@ debug_init(OS *os, iptr input, Arena *arena) #endif /* _DEBUG */ -#define static_path_join(a, b) (a OS_PATH_SEPARATOR b) - struct gl_debug_ctx { Stream stream; iptr os_error_handle; @@ -172,8 +170,9 @@ dump_gl_params(GLParams *gl, Arena a, OS *os) function FILE_WATCH_CALLBACK_FN(reload_shader) { - ShaderReloadContext *ctx = (typeof(ctx))user_data; - return beamformer_reload_shader(os, ctx->beamformer_context, ctx, arena, ctx->name); + ShaderReloadContext *ctx = (typeof(ctx))user_data; + BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + ctx->reloadable_info_index; + return beamformer_reload_shader(os, path, ctx, arena, beamformer_shader_names[rsi->kind]); } function FILE_WATCH_CALLBACK_FN(reload_shader_indirect) @@ -321,6 +320,9 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input Stream error = stream_alloc(memory, MB(1)); Arena ui_arena = sub_arena(memory, MB(2), KB(4)); + Arena scratch = {.beg = memory->end - 4096L, .end = memory->end}; + memory->end = scratch.beg; + BeamformerCtx *ctx = *o_ctx = push_struct(memory, typeof(*ctx)); BeamformerInput *input = *o_input = push_struct(memory, typeof(*input)); @@ -330,6 +332,7 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input input->executable_reloaded = 1; os_init(&ctx->os, memory); + ctx->os.path_separator = s8(OS_PATH_SEPARATOR); ctx->os.compute_worker.arena = compute_arena; ctx->os.compute_worker.asleep = 1; ctx->os.upload_worker.arena = upload_arena; @@ -428,21 +431,23 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input #undef X }; - #define X(e, f, ...) do if (s8(f).len > 0) { \ - ShaderReloadContext *src = push_struct(memory, typeof(*src)); \ - src->beamformer_context = ctx; \ - src->header = compute_headers[BeamformerShaderKind_##e]; \ - src->path = s8(static_path_join("shaders", f ".glsl")); \ - src->name = src->path; \ - src->shader = cs->programs + BeamformerShaderKind_##e; \ - src->gl_type = GL_COMPUTE_SHADER; \ - src->kind = BeamformerShaderKind_##e; \ - src->link = src; \ - os_add_file_watch(&ctx->os, memory, src->path, reload_shader_indirect, (iptr)src); \ - reload_shader_indirect(&ctx->os, src->path, (iptr)src, *memory); \ - } while (0); - COMPUTE_SHADERS_INTERNAL - #undef X + for EachElement(beamformer_reloadable_compute_shader_info_indices, it) { + i32 index = beamformer_reloadable_compute_shader_info_indices[it]; + Arena temp = scratch; + + s8 file = push_s8_from_parts(&temp, s8(OS_PATH_SEPARATOR), s8("shaders"), + beamformer_reloadable_shader_files[index]); + + BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + index; + ShaderReloadContext *src = push_struct(memory, typeof(*src)); + src->beamformer_context = ctx; + src->reloadable_info_index = index; + src->link = src; + src->header = compute_headers[rsi->kind]; + src->gl_type = GL_COMPUTE_SHADER; + os_add_file_watch(&ctx->os, memory, file, reload_shader_indirect, (iptr)src); + reload_shader_indirect(&ctx->os, file, (iptr)src, *memory); + } os_wake_waiters(&worker->sync_variable); FrameViewRenderContext *fvr = &ctx->frame_view_render_context; @@ -457,13 +462,16 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input glNamedRenderbufferStorageMultisample(fvr->renderbuffers[1], msaa_samples, GL_DEPTH_COMPONENT24, FRAME_VIEW_RENDER_TARGET_SIZE); + static_assert(countof(beamformer_reloadable_render_shader_info_indices) == 1, + "only a single render shader is currently handled"); + i32 render_rsi_index = beamformer_reloadable_render_shader_info_indices[0]; + + s8 render_file = push_s8_from_parts(&scratch, s8(OS_PATH_SEPARATOR), s8("shaders"), + beamformer_reloadable_shader_files[render_rsi_index]); ShaderReloadContext *render_3d = push_struct(memory, typeof(*render_3d)); - render_3d->beamformer_context = ctx; - render_3d->path = s8(static_path_join("shaders", "render_3d.frag.glsl")); - render_3d->name = s8("shaders/render_3d.glsl"); + render_3d->beamformer_context = ctx; + render_3d->reloadable_info_index = render_rsi_index; render_3d->gl_type = GL_FRAGMENT_SHADER; - render_3d->kind = BeamformerShaderKind_Render3D; - render_3d->shader = &fvr->shader; render_3d->header = s8("" "layout(location = 0) in vec3 normal;\n" "layout(location = 1) in vec3 texture_coordinate;\n\n" @@ -480,6 +488,7 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input "layout(binding = 0) uniform sampler3D u_texture;\n"); render_3d->link = push_struct(memory, typeof(*render_3d)); + render_3d->link->reloadable_info_index = -1; render_3d->link->gl_type = GL_VERTEX_SHADER; render_3d->link->link = render_3d; render_3d->link->header = s8("" @@ -507,8 +516,8 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input "\tf_normal = v_normal;\n" "\tgl_Position = u_projection * u_view * u_model * vec4(pos, 1);\n" "}\n"); - reload_shader(&ctx->os, render_3d->path, (iptr)render_3d, *memory); - os_add_file_watch(&ctx->os, memory, render_3d->path, reload_shader, (iptr)render_3d); + reload_shader(&ctx->os, render_file, (iptr)render_3d, *memory); + os_add_file_watch(&ctx->os, memory, render_file, reload_shader, (iptr)render_3d); f32 unit_cube_vertices[] = { 0.5f, 0.5f, -0.5f, @@ -580,6 +589,8 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input cs->unit_cube_model = render_model_from_arrays(unit_cube_vertices, unit_cube_normals, sizeof(unit_cube_vertices), unit_cube_indices, countof(unit_cube_indices)); + + memory->end = scratch.end; } function void diff --git a/ui.c b/ui.c @@ -2644,10 +2644,6 @@ draw_compute_stats_bar_view(BeamformerUI *ui, Arena arena, ComputeShaderStats *s } } - #define X(e, s, pn, ...) [BeamformerShaderKind_##e] = s8_comp(pn ": "), - read_only local_persist s8 labels[BeamformerShaderKind_ComputeCount] = {COMPUTE_SHADERS_INTERNAL}; - #undef X - v2 result = table_extent(table, arena, ts.font); f32 remaining_width = r.size.w - result.w - table->cell_pad.w; @@ -2678,8 +2674,8 @@ draw_compute_stats_bar_view(BeamformerUI *ui, Arena arena, ComputeShaderStats *s DrawRectangleRec(rect.rl, color); if (point_in_rect(mouse, rect)) { text_pos = v2_add(rect.pos, (v2){.x = table->cell_pad.w}); - mouse_text = push_compute_time(&arena, labels[stages[i]], - stats->table.times[frame_index][stages[i]]); + s8 name = push_s8_from_parts(&arena, s8(""), beamformer_shader_names[stages[i]], s8(": ")); + mouse_text = push_compute_time(&arena, name, stats->table.times[frame_index][stages[i]]); } rect.pos.x += rect.size.w; } @@ -2707,7 +2703,7 @@ push_table_time_row(Table *table, Arena *arena, s8 label, f32 time) { assert(table->columns == 3); TableCell *cells = table_push_row(table, arena, TRK_CELLS)->data; - cells[0].text = label; + cells[0].text = push_s8_from_parts(arena, s8(""), label, s8(":")); cells[1].text = push_compute_time(arena, s8(""), time); cells[2].text = s8("[s]"); } @@ -2772,12 +2768,9 @@ draw_compute_stats_view(BeamformerUI *ui, Arena arena, Variable *view, Rect r, v Table *table = table_new(&arena, 2, TextAlignment_Left, TextAlignment_Left, TextAlignment_Left); switch (csv->kind) { case ComputeStatsViewKind_Average:{ - #define X(e, n, pn, ...) [BeamformerShaderKind_##e] = s8_comp(pn ":"), - read_only local_persist s8 labels[BeamformerShaderKind_ComputeCount] = {COMPUTE_SHADERS_INTERNAL}; - #undef X da_reserve(&arena, table, stages); for (u32 i = 0; i < stages; i++) { - push_table_time_row(table, &arena, labels[cp->pipeline.shaders[i]], + push_table_time_row(table, &arena, beamformer_shader_names[cp->pipeline.shaders[i]], stats->average_times[cp->pipeline.shaders[i]]); } }break; diff --git a/util.c b/util.c @@ -72,11 +72,25 @@ arena_alloc(Arena *a, iz len, uz align, iz count) return result; } -enum { DA_INITIAL_CAP = 4 }; +enum { DA_INITIAL_CAP = 16 }; + +#define DA_STRUCT(kind, name) typedef struct { \ + kind *data; \ + iz count; \ + iz capacity; \ +} name ##List; + +#define da_index(it, s) ((it) - (s)->data) #define da_reserve(a, s, n) \ (s)->data = da_reserve_((a), (s)->data, &(s)->capacity, (s)->count + n, \ _Alignof(typeof(*(s)->data)), sizeof(*(s)->data)) +#define da_append_count(a, s, items, item_count) do { \ + da_reserve((a), (s), (item_count)); \ + mem_copy((s)->data + (s)->count, (items), sizeof(*(items)) * (uz)(item_count)); \ + (s)->count += (item_count); \ +} while (0) + #define da_push(a, s) \ ((s)->count == (s)->capacity \ ? da_reserve(a, s, 1), \ @@ -198,7 +212,7 @@ function Stream stream_alloc(Arena *a, i32 cap) { Stream result = {.cap = cap}; - result.data = push_array(a, u8, cap); + result.data = arena_commit(a, cap); return result; } @@ -415,6 +429,14 @@ arena_stream_commit_zero(Arena *a, Stream *s) return result; } +function s8 +arena_stream_commit_and_reset(Arena *arena, Stream *s) +{ + s8 result = arena_stream_commit_zero(arena, s); + *s = arena_stream(*arena); + return result; +} + /* NOTE(rnp): FNV-1a hash */ function u64 s8_hash(s8 v) @@ -504,16 +526,32 @@ s8_to_s16(Arena *a, s8 in) return result; } +#define push_s8_from_parts(a, j, ...) push_s8_from_parts_((a), (j), arg_list(s8, __VA_ARGS__)) function s8 -push_s8(Arena *a, s8 str) +push_s8_from_parts_(Arena *arena, s8 joiner, s8 *parts, iz count) { - s8 result = s8_alloc(a, str.len); - mem_copy(result.data, str.data, (uz)result.len); + iz length = joiner.len * (count - 1); + for (iz i = 0; i < count; i++) + length += parts[i].len; + + s8 result = {.len = length, .data = arena_commit(arena, length + 1)}; + + iz offset = 0; + for (iz i = 0; i < count; i++) { + if (i != 0) { + mem_copy(result.data + offset, joiner.data, (uz)joiner.len); + offset += joiner.len; + } + mem_copy(result.data + offset, parts[i].data, (uz)parts[i].len); + offset += parts[i].len; + } + result.data[result.len] = 0; + return result; } function s8 -push_s8_zero(Arena *a, s8 str) +push_s8(Arena *a, s8 str) { s8 result = s8_alloc(a, str.len + 1); result.len -= 1; diff --git a/util.h b/util.h @@ -56,8 +56,6 @@ #define asan_unpoison_region(...) #endif -#define INVALID_CODE_PATH ASSERT(0) -#define INVALID_DEFAULT_CASE default: ASSERT(0); break #define InvalidCodePath assert(0) #define InvalidDefaultCase default: assert(0); break @@ -87,8 +85,14 @@ #define SIGN(x) ((x) < 0? -1 : 1) #define swap(a, b) do {typeof(a) __tmp = (a); (a) = (b); (b) = __tmp;} while(0) +#define ISDIGIT(c) (BETWEEN((c), '0', '9')) +#define ISUPPER(c) (((c) & 0x20u) == 0) +#define TOLOWER(c) (((c) | 0x20u)) +#define TOUPPER(c) (((c) & ~(0x20u))) + #define f32_cmp(x, y) (ABS((x) - (y)) <= F32_EPSILON * MAX(1.0f, MAX(ABS(x), ABS(y)))) +#define EachBit(a, it) (u64 it = ctz_u64(a); it != 64; a &= ~(1u << (it)), it = ctz_u64(a)) #define EachElement(array, it) (u64 it = 0; it < countof(array); it += 1) #define EachEnumValue(type, it) (type it = (type)0; it < type##_Count; it = (type)(it + 1)) #define EachNonZeroEnumValue(type, it) (type it = (type)1; it < type##_Count; it = (type)(it + 1)) @@ -128,6 +132,7 @@ #define GB(a) ((u64)(a) << 30ULL) #define I32_MAX (0x7FFFFFFFL) +#define U16_MAX (0x0000FFFFUL) #define U32_MAX (0xFFFFFFFFUL) #define F32_INFINITY (1e+300*1e+300) #define F32_EPSILON (1e-6f) @@ -353,6 +358,7 @@ struct OS { FileWatchContext file_watch_context; iptr context; iptr error_handle; + s8 path_separator; GLWorkerThreadContext compute_worker; GLWorkerThreadContext upload_worker; diff --git a/util_gl.c b/util_gl.c @@ -11,7 +11,7 @@ compile_shader(OS *os, Arena a, u32 type, s8 shader, s8 name) if (res == GL_FALSE) { Stream buf = arena_stream(a); - stream_append_s8s(&buf, name, s8(": failed to compile\n")); + stream_append_s8s(&buf, s8("\n"), name, s8(": failed to compile\n")); i32 len = 0, out_len = 0; glGetShaderiv(sid, GL_INFO_LOG_LENGTH, &len); @@ -63,12 +63,7 @@ load_shader(OS *os, Arena arena, s8 *shader_texts, u32 *shader_types, i32 count, if (valid) result = link_program(os, arena, ids, count); for (i32 i = 0; i < count; i++) glDeleteShader(ids[i]); - if (result) { - Stream buf = arena_stream(arena); - stream_append_s8s(&buf, s8("loaded: "), name, s8("\n")); - os_write_file(os->error_handle, stream_to_s8(&buf)); - LABEL_GL_OBJECT(GL_PROGRAM, result, name); - } + if (result) glObjectLabel(GL_PROGRAM, result, (i32)name.len, (c8 *)name.data); return result; }