Commit: 47d14ce3869cf38781e912453cd7c86286c8c68e
Parent: 00832c3949982d9ef4e717cba080629c50f91610
Author: Randy Palamar
Date: Wed, 3 Sep 2025 13:35:01 -0600
build: add meta program
Currently this handles:
- generating a list of shader variations with baked parameters
- these are measured to be more performant than behaviour modification
through values specified at runtime
- a set of enums that are required by the shaders
- some of which are global and used below
- MATLAB bindings based on global enums
- modified helper library header based on global enums
No optimization was performed on the "compiler" so it may get slow
as the meta file expands. A fair amount of work was done to ensure
that information that is shared between shader variations is not
duplicated over and over.
The generated C code is committed so that it is clear what parts
of the actual compiled program have changed when something in the
meta file or "compiler" is modified.
Diffstat:
20 files changed, 2322 insertions(+), 554 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,9 +1,11 @@
*
!.github
-!tests
!external
+!generated
!helpers
!shaders
+!tests
!*.c
!*.glsl
!*.h
+!*.meta
diff --git a/beamformer.c b/beamformer.c
@@ -1,9 +1,5 @@
/* See LICENSE for license details. */
/* TODO(rnp):
- * [ ]: filter shader specializations need to be generated per sample mode
- * - performance was measured with a switch on sampling mode and the perfomance gained
- * is 80% worse than just having a baked in sampling mode
- * - should also include channel mapping just in case
* [ ]: make decode output real values for real inputs and complex values for complex inputs
* - this means that das should have a RF version and an IQ version
* - this will also flip the current hack to support demodulate after decode to
@@ -469,48 +465,61 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
u32 shader = pb->pipeline.shaders[i];
b32 commit = 0;
+ iz match = 0;
switch (shader) {
case BeamformerShaderKind_CudaHilbert:{ commit = run_cuda_hilbert; }break;
case BeamformerShaderKind_Decode:{
- BeamformerShaderKind decode_table[] = {
- [BeamformerDataKind_Int16] = BeamformerShaderKind_Decode,
- [BeamformerDataKind_Int16Complex] = BeamformerShaderKind_DecodeInt16Complex,
- [BeamformerDataKind_Float32] = BeamformerShaderKind_DecodeFloat,
- [BeamformerDataKind_Float32Complex] = BeamformerShaderKind_DecodeFloatComplex,
- };
- if (decode_first && demodulate) {
- /* TODO(rnp): for now we assume that if we are demodulating the data is int16 */
- shader = BeamformerShaderKind_DecodeInt16ToFloat;
- } else if (decode_first) {
- shader = decode_table[CLAMP(data_kind, 0, countof(decode_table) - 1)];
- } else {
- if (data_kind == BeamformerDataKind_Int16)
- shader = BeamformerShaderKind_DecodeInt16Complex;
- else
- shader = BeamformerShaderKind_DecodeFloatComplex;
+ /* TODO(rnp): rework decode first and demodulate after */
+ BeamformerDataKind decode_data_kind = data_kind;
+ if (!decode_first) {
+ if (data_kind == BeamformerDataKind_Int16) {
+ decode_data_kind = BeamformerDataKind_Int16Complex;
+ } else {
+ decode_data_kind = BeamformerDataKind_Float32Complex;
+ }
}
+ match = beamformer_shader_decode_match(decode_data_kind);
commit = 1;
}break;
case BeamformerShaderKind_Demodulate:{
BeamformerFilter *f = cp->filters + sp->filter_slot;
- if (decode_first || (!decode_first && data_kind == BeamformerDataKind_Float32)) {
- if (f->parameters.complex) shader = BeamformerShaderKind_DemodulateFloatCF;
- else shader = BeamformerShaderKind_DemodulateFloat;
- } else if (f->parameters.complex) {
- shader = BeamformerShaderKind_DemodulateCF;
- }
+ i32 local_flags = BeamformerShaderFilterFlags_Demodulate;
+ if (f->parameters.complex) local_flags |= BeamformerShaderFilterFlags_ComplexFilter;
+ if (!decode_first) local_flags |= BeamformerShaderFilterFlags_MapChannels;
+
+ BeamformerDataKind filter_data_kind = data_kind;
+ if (decode_first)
+ filter_data_kind = BeamformerDataKind_Float32;
+
+ match = beamformer_shader_demodulate_match(filter_data_kind, pb->parameters.sampling_mode, local_flags);
+
bp->time_offset += f->time_delay;
commit = 1;
}break;
case BeamformerShaderKind_Filter:{
BeamformerFilter *f = cp->filters + sp->filter_slot;
- if (f->parameters.complex) shader = BeamformerShaderKind_FilterCF;
+ i32 local_flags = 0;
+ if (f->parameters.complex) local_flags |= BeamformerShaderFilterFlags_ComplexFilter;
+
+ BeamformerDataKind filter_data_kind = data_kind;
+ if (decode_first)
+ filter_data_kind = BeamformerDataKind_Float32;
+
+ match = beamformer_shader_filter_match(filter_data_kind, local_flags);
bp->time_offset += f->time_delay;
commit = 1;
}break;
case BeamformerShaderKind_DAS:{
+ BeamformerDataKind das_data_kind = BeamformerDataKind_Float32;
+ if (demodulate || run_cuda_hilbert)
+ das_data_kind = BeamformerDataKind_Float32Complex;
+
+ i32 local_flags = 0;
if ((bp->shader_flags & DASShaderFlags_CoherencyWeighting) == 0)
- shader = BeamformerShaderKind_DASFast;
+ local_flags |= BeamformerShaderDASFlags_Fast;
+ if (bp->shader_kind == DASShaderKind_UFORCES || bp->shader_kind == DASShaderKind_UHERCULES)
+ local_flags |= BeamformerShaderDASFlags_Sparse;
+ match = beamformer_shader_das_match(das_data_kind, local_flags);
commit = 1;
}break;
default:{ commit = 1; }break;
@@ -518,8 +527,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
if (commit) {
u32 index = cp->pipeline.shader_count++;
- cp->pipeline.shaders[index] = shader;
- cp->pipeline.parameters[index] = *sp;
+ cp->pipeline.shaders[index] = shader;
+ cp->pipeline.program_indices[index] = (u32)match;
+ cp->pipeline.parameters[index] = *sp;
}
}
cp->pipeline.data_kind = data_kind;
@@ -578,8 +588,6 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
mp->sampling_frequency = bp->sampling_frequency / 2;
mp->decimation_rate = decimation_rate;
- if (!decode_first) mp->shader_flags |= FilterShaderFlags_MapChannels;
-
bp->sampling_frequency /= 2 * (f32)mp->decimation_rate;
bp->sample_count /= 2 * mp->decimation_rate;
@@ -618,7 +626,6 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
flt->demodulation_frequency = bp->demodulation_frequency;
flt->sampling_frequency = bp->sampling_frequency;
flt->decimation_rate = 1;
- flt->shader_flags = pb->parameters.sampling_mode & FilterShaderFlags_SamplingModeMask;
flt->output_channel_stride = bp->sample_count * bp->acquisition_count;
flt->output_sample_stride = 1;
flt->output_transmit_stride = bp->sample_count;
@@ -713,23 +720,21 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp,
function void
do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame *frame,
- BeamformerShaderKind shader, BeamformerShaderParameters *sp, Arena arena)
+ BeamformerShaderKind shader, u32 program_index, BeamformerShaderParameters *sp, Arena arena)
{
BeamformerComputeContext *cc = &ctx->compute_context;
- u32 program = cc->programs[shader];
+ i32 *match_vector = beamformer_shader_match_vectors[program_index];
+ BeamformerShaderDescriptor *shader_descriptor = beamformer_shader_descriptors + shader;
+
+ u32 program = cc->programs[program_index];
glUseProgram(program);
u32 output_ssbo_idx = !cc->last_output_ssbo_index;
u32 input_ssbo_idx = cc->last_output_ssbo_index;
switch (shader) {
- case BeamformerShaderKind_Decode:
- case BeamformerShaderKind_DecodeInt16Complex:
- case BeamformerShaderKind_DecodeFloat:
- case BeamformerShaderKind_DecodeFloatComplex:
- case BeamformerShaderKind_DecodeInt16ToFloat:
- {
+ case BeamformerShaderKind_Decode:{
glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[BeamformerComputeUBOKind_Decode]);
glBindImageTexture(0, cp->textures[BeamformerComputeTextureKind_Hadamard], 0, 0, 0, GL_READ_ONLY, GL_R8I);
@@ -761,26 +766,23 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame
cc->last_output_ssbo_index = !cc->last_output_ssbo_index;
}break;
case BeamformerShaderKind_Filter:
- case BeamformerShaderKind_FilterCF:
case BeamformerShaderKind_Demodulate:
- case BeamformerShaderKind_DemodulateCF:
- case BeamformerShaderKind_DemodulateFloat:
- case BeamformerShaderKind_DemodulateFloatCF:
{
- BeamformerFilterUBO *ubo = &cp->demod_ubo_data;
- if (shader == BeamformerShaderKind_Filter)
- ubo = &cp->filter_ubo_data;
+ i32 local_flags = match_vector[shader_descriptor->match_vector_length];
+ b32 map_channels = (local_flags & BeamformerShaderFilterFlags_MapChannels) != 0;
u32 index = shader == BeamformerShaderKind_Filter ? BeamformerComputeUBOKind_Filter
: BeamformerComputeUBOKind_Demodulate;
glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[index]);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, cc->ping_pong_ssbos[output_ssbo_idx]);
- if ((ubo->shader_flags & FilterShaderFlags_MapChannels) == 0)
+
+ if (!map_channels)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, cc->ping_pong_ssbos[input_ssbo_idx]);
GLenum kind = cp->filters[sp->filter_slot].parameters.complex? GL_RG32F : GL_R32F;
glBindImageTexture(0, cp->filters[sp->filter_slot].texture, 0, 0, 0, GL_READ_ONLY, kind);
- if (ubo->shader_flags & FilterShaderFlags_MapChannels)
+
+ if (map_channels)
glBindImageTexture(1, cp->textures[BeamformerComputeTextureKind_ChannelMapping], 0, 0, 0, GL_READ_ONLY, GL_R16I);
glDispatchCompute(cp->demod_dispatch.x, cp->demod_dispatch.y, cp->demod_dispatch.z);
@@ -801,11 +803,14 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
}break;
- case BeamformerShaderKind_DAS:
- case BeamformerShaderKind_DASFast:
- {
+ case BeamformerShaderKind_DAS:{
BeamformerDASUBO *ubo = &cp->das_ubo_data;
- if (shader == BeamformerShaderKind_DASFast) {
+
+ i32 local_flags = match_vector[shader_descriptor->match_vector_length];
+ b32 fast = (local_flags & BeamformerShaderDASFlags_Fast) != 0;
+ b32 sparse = (local_flags & BeamformerShaderDASFlags_Sparse) != 0;
+
+ if (fast) {
glClearTexImage(frame->texture, 0, GL_RED, GL_FLOAT, 0);
glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
glBindImageTexture(0, frame->texture, 0, GL_TRUE, 0, GL_READ_WRITE, GL_RG32F);
@@ -813,14 +818,17 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame
glBindImageTexture(0, frame->texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F);
}
+ u32 sparse_texture = cp->textures[BeamformerComputeTextureKind_SparseElements];
+ if (!sparse) sparse_texture = 0;
+
glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[BeamformerComputeUBOKind_DAS]);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, cc->ping_pong_ssbos[input_ssbo_idx], 0, cp->rf_size);
- glBindImageTexture(1, cp->textures[BeamformerComputeTextureKind_SparseElements], 0, 0, 0, GL_READ_ONLY, GL_R16I);
- glBindImageTexture(2, cp->textures[BeamformerComputeTextureKind_FocalVectors], 0, 0, 0, GL_READ_ONLY, GL_RG32F);
+ glBindImageTexture(1, sparse_texture, 0, 0, 0, GL_READ_ONLY, GL_R16I);
+ glBindImageTexture(2, cp->textures[BeamformerComputeTextureKind_FocalVectors], 0, 0, 0, GL_READ_ONLY, GL_RG32F);
glProgramUniform1ui(program, DAS_CYCLE_T_UNIFORM_LOC, cycle_t++);
- if (shader == BeamformerShaderKind_DASFast) {
+ if (fast) {
i32 loop_end;
if (ubo->shader_kind == DASShaderKind_RCA_VLS ||
ubo->shader_kind == DASShaderKind_RCA_TPW)
@@ -838,9 +846,9 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame
/* IMPORTANT(rnp): prevents OS from coalescing and killing our shader */
glFinish();
glProgramUniform1i(program, DAS_FAST_CHANNEL_UNIFORM_LOC, index);
- glDispatchCompute((u32)ceil_f32((f32)frame->dim.x / DAS_FAST_LOCAL_SIZE_X),
- (u32)ceil_f32((f32)frame->dim.y / DAS_FAST_LOCAL_SIZE_Y),
- (u32)ceil_f32((f32)frame->dim.z / DAS_FAST_LOCAL_SIZE_Z));
+ glDispatchCompute((u32)ceil_f32((f32)frame->dim.x / DAS_LOCAL_SIZE_X),
+ (u32)ceil_f32((f32)frame->dim.y / DAS_LOCAL_SIZE_Y),
+ (u32)ceil_f32((f32)frame->dim.z / DAS_LOCAL_SIZE_Z));
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
} else {
@@ -902,118 +910,41 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame
}
}
-function s8
-shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena)
+function void
+stream_push_shader_header(Stream *s, ShaderReloadContext *ctx)
{
- Stream sb = arena_stream(*arena);
- stream_append_s8s(&sb, s8("#version 460 core\n\n"), ctx->header);
+ BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + ctx->reloadable_info_index;
- switch (ctx->kind) {
- case BeamformerShaderKind_Filter:
- case BeamformerShaderKind_FilterCF:
- case BeamformerShaderKind_Demodulate:
- case BeamformerShaderKind_DemodulateCF:
- case BeamformerShaderKind_DemodulateFloat:
- case BeamformerShaderKind_DemodulateFloatCF:
- {
- stream_append_s8(&sb, s8(""
+ stream_append_s8s(s, s8("#version 460 core\n\n"), ctx->header);
+
+ switch (rsi->kind) {
+ case BeamformerShaderKind_Filter:{
+ stream_append_s8(s, s8(""
"layout(local_size_x = " str(FILTER_LOCAL_SIZE_X) ", "
"local_size_y = " str(FILTER_LOCAL_SIZE_Y) ", "
"local_size_z = " str(FILTER_LOCAL_SIZE_Z) ") in;\n\n"
));
-
- switch (ctx->kind) {
- case BeamformerShaderKind_FilterCF:
- case BeamformerShaderKind_DemodulateCF:
- case BeamformerShaderKind_DemodulateFloatCF:
- {
- stream_append_s8(&sb, s8("#define COMPLEX_FILTER 1\n"));
- }break;
- default:{
- stream_append_s8(&sb, s8("#define COMPLEX_FILTER 0\n"));
- }break;
- }
-
- switch (ctx->kind) {
- case BeamformerShaderKind_Filter:
- case BeamformerShaderKind_FilterCF:
- case BeamformerShaderKind_DemodulateFloat:
- case BeamformerShaderKind_DemodulateFloatCF:
- {
- stream_append_s8(&sb, s8("#define INPUT_DATA_TYPE_FLOAT\n"));
- }break;
- default:{}break;
- }
-
- switch (ctx->kind) {
- case BeamformerShaderKind_Demodulate:
- case BeamformerShaderKind_DemodulateCF:
- case BeamformerShaderKind_DemodulateFloat:
- case BeamformerShaderKind_DemodulateFloatCF:
- {
- stream_append_s8(&sb, s8("#define DEMODULATE\n"));
- }break;
- default:{}break;
- }
-
- stream_append_byte(&sb, '\n');
- #define X(k, v, ...) "#define ShaderFlags_" #k " " #v "\n"
- stream_append_s8(&sb, s8(FILTER_SHADER_FLAGS_LIST));
- #undef X
-
- stream_append_byte(&sb, '\n');
- #define X(k, v, ...) "#define SamplingMode_" #k " " #v "\n"
- stream_append_s8(&sb, s8(SAMPLING_MODES_LIST));
- #undef X
- stream_append_byte(&sb, '\n');
}break;
- case BeamformerShaderKind_DAS:
- case BeamformerShaderKind_DASFast:
- {
- if (ctx->kind == BeamformerShaderKind_DAS) {
- stream_append_s8(&sb, s8(""
- "layout(local_size_x = " str(DAS_LOCAL_SIZE_X) ", "
- "local_size_y = " str(DAS_LOCAL_SIZE_Y) ", "
- "local_size_z = " str(DAS_LOCAL_SIZE_Z) ") in;\n\n"
- "#define DAS_FAST 0\n\n"
- "layout(location = " str(DAS_VOXEL_OFFSET_UNIFORM_LOC) ") uniform ivec3 u_voxel_offset;\n"
- ));
- } else {
- stream_append_s8(&sb, s8(""
- "layout(local_size_x = " str(DAS_FAST_LOCAL_SIZE_X) ", "
- "local_size_y = " str(DAS_FAST_LOCAL_SIZE_Y) ", "
- "local_size_z = " str(DAS_FAST_LOCAL_SIZE_Z) ") in;\n\n"
- "#define DAS_FAST 1\n\n"
- "layout(location = " str(DAS_FAST_CHANNEL_UNIFORM_LOC) ") uniform int u_channel;\n"
- ));
- }
- stream_append_s8(&sb, s8(""
- "layout(location = " str(DAS_CYCLE_T_UNIFORM_LOC) ") uniform uint u_cycle_t;\n\n"));
+ case BeamformerShaderKind_DAS:{
+ stream_append_s8(s, s8(""
+ "layout(local_size_x = " str(DAS_LOCAL_SIZE_X) ", "
+ "local_size_y = " str(DAS_LOCAL_SIZE_Y) ", "
+ "local_size_z = " str(DAS_LOCAL_SIZE_Z) ") in;\n\n"
+ "layout(location = " str(DAS_VOXEL_OFFSET_UNIFORM_LOC) ") uniform ivec3 u_voxel_offset;\n"
+ "layout(location = " str(DAS_CYCLE_T_UNIFORM_LOC) ") uniform uint u_cycle_t;\n"
+ "layout(location = " str(DAS_FAST_CHANNEL_UNIFORM_LOC) ") uniform int u_channel;\n\n"
+ ));
- #define X(k, v, ...) "#define ShaderFlags_" #k " " #v "\n"
- stream_append_s8(&sb, s8(DAS_SHADER_FLAGS_LIST));
+ #define X(k, id, ...) "#define ShaderFlags_" #k " " #id "\n"
+ stream_append_s8s(s, s8(DAS_SHADER_FLAGS_LIST), s8("\n"));
#undef X
-
- stream_append_byte(&sb, '\n');
-
#define X(k, id, ...) "#define ShaderKind_" #k " " #id "\n"
- stream_append_s8(&sb, s8(DAS_SHADER_KIND_LIST));
+ stream_append_s8s(s, s8(DAS_SHADER_KIND_LIST), s8("\n"));
#undef X
}break;
- case BeamformerShaderKind_Decode:
- case BeamformerShaderKind_DecodeFloat:
- case BeamformerShaderKind_DecodeFloatComplex:
- case BeamformerShaderKind_DecodeInt16Complex:
- case BeamformerShaderKind_DecodeInt16ToFloat:
- {
- s8 define_table[] = {
- [BeamformerShaderKind_DecodeFloatComplex] = s8("#define INPUT_DATA_TYPE_FLOAT_COMPLEX\n\n"),
- [BeamformerShaderKind_DecodeFloat] = s8("#define INPUT_DATA_TYPE_FLOAT\n\n"),
- [BeamformerShaderKind_DecodeInt16Complex] = s8("#define INPUT_DATA_TYPE_INT16_COMPLEX\n\n"),
- [BeamformerShaderKind_DecodeInt16ToFloat] = s8("#define OUTPUT_DATA_TYPE_FLOAT\n\n"),
- };
+ case BeamformerShaderKind_Decode:{
#define X(type, id, pretty) "#define DECODE_MODE_" #type " " #id "\n"
- stream_append_s8s(&sb, define_table[ctx->kind], s8(""
+ stream_append_s8s(s, s8(""
"layout(local_size_x = " str(DECODE_LOCAL_SIZE_X) ", "
"local_size_y = " str(DECODE_LOCAL_SIZE_Y) ", "
"local_size_z = " str(DECODE_LOCAL_SIZE_Z) ") in;\n\n"
@@ -1023,20 +954,27 @@ shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena)
#undef X
}break;
case BeamformerShaderKind_MinMax:{
- stream_append_s8(&sb, s8("layout(location = " str(MIN_MAX_MIPS_LEVEL_UNIFORM_LOC)
- ") uniform int u_mip_map;\n\n"));
+ stream_append_s8(s, s8("layout(location = " str(MIN_MAX_MIPS_LEVEL_UNIFORM_LOC)
+ ") uniform int u_mip_map;\n\n"));
}break;
case BeamformerShaderKind_Sum:{
- stream_append_s8(&sb, s8("layout(location = " str(SUM_PRESCALE_UNIFORM_LOC)
- ") uniform float u_sum_prescale = 1.0;\n\n"));
+ stream_append_s8(s, s8("layout(location = " str(SUM_PRESCALE_UNIFORM_LOC)
+ ") uniform float u_sum_prescale = 1.0;\n\n"));
}break;
default:{}break;
}
+}
+
+function s8
+shader_text_with_header(ShaderReloadContext *ctx, s8 filepath, Arena *arena)
+{
+ Stream sb = arena_stream(*arena);
+ stream_push_shader_header(&sb, ctx);
stream_append_s8(&sb, s8("\n#line 1\n"));
s8 result = arena_stream_commit(arena, &sb);
- if (ctx->path.len) {
- s8 file = os_read_whole_file(arena, (c8 *)ctx->path.data);
+ if (filepath.len > 0) {
+ s8 file = os_read_whole_file(arena, (c8 *)filepath.data);
assert(file.data == result.data + result.len);
result.len += file.len;
}
@@ -1046,6 +984,8 @@ shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena)
DEBUG_EXPORT BEAMFORMER_RELOAD_SHADER_FN(beamformer_reload_shader)
{
+ BeamformerCtx *ctx = src->beamformer_context;
+
i32 shader_count = 1;
ShaderReloadContext *link = src->link;
while (link != src) { shader_count++; link = link->link; }
@@ -1055,27 +995,105 @@ DEBUG_EXPORT BEAMFORMER_RELOAD_SHADER_FN(beamformer_reload_shader)
i32 index = 0;
do {
- shader_texts[index] = shader_text_with_header(link, os, &arena);
+ s8 filepath = {0};
+ if (link->reloadable_info_index >= 0) filepath = path;
+ shader_texts[index] = shader_text_with_header(link, filepath, &arena);
shader_types[index] = link->gl_type;
index++;
link = link->link;
} while (link != src);
- glDeleteProgram(*src->shader);
- *src->shader = load_shader(&ctx->os, arena, shader_texts, shader_types, shader_count, shader_name);
- if (src->kind == BeamformerShaderKind_Render3D) ctx->frame_view_render_context.updated = 1;
+ BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + src->reloadable_info_index;
+ u32 *shader = ctx->compute_context.programs + rsi->kind;
+ if (rsi->kind == BeamformerShaderKind_Render3D)
+ shader = &ctx->frame_view_render_context.shader;
+
+ glDeleteProgram(*shader);
+ *shader = load_shader(&ctx->os, arena, shader_texts, shader_types, shader_count, shader_name);
+ if (rsi->kind == BeamformerShaderKind_Render3D) ctx->frame_view_render_context.updated = 1;
return 1;
}
-function b32
-reload_compute_shader(BeamformerCtx *ctx, ShaderReloadContext *src, s8 name_extra, Arena arena)
+function void
+reload_compute_shader(BeamformerCtx *ctx, ShaderReloadContext *src, Arena arena)
{
- Stream sb = arena_stream(arena);
- stream_append_s8s(&sb, src->name, name_extra);
- s8 name = arena_stream_commit(&arena, &sb);
- b32 result = beamformer_reload_shader(&ctx->os, ctx, src, arena, name);
- return result;
+ BeamformerComputeContext *cc = &ctx->compute_context;
+ BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + src->reloadable_info_index;
+ BeamformerShaderDescriptor *sd = beamformer_shader_descriptors + rsi->kind;
+
+ Stream status = stream_alloc(&arena, 128);
+ u32 completed = 0;
+ u32 total_shaders = (u32)(sd->one_past_last_match_vector_index - sd->first_match_vector_index);
+ for (i32 i = 0; i < rsi->sub_shader_descriptor_index_count; i++) {
+ BeamformerShaderDescriptor *ssd = beamformer_shader_descriptors + rsi->sub_shader_descriptor_indices[i];
+ total_shaders += (u32)(ssd->one_past_last_match_vector_index - ssd->first_match_vector_index);
+ }
+
+ s8 path = push_s8_from_parts(&arena, ctx->os.path_separator, s8("shaders"),
+ beamformer_reloadable_shader_files[src->reloadable_info_index]);
+ s8 file_text = os_read_whole_file(&arena, (c8 *)path.data);
+ Stream shader = arena_stream(arena);
+
+ stream_push_shader_header(&shader, src);
+
+ stream_append_s8(&shader, beamformer_shader_local_header_strings[src->reloadable_info_index]);
+
+ i32 save_point = shader.widx;
+ for (i32 sub_index = -1; sub_index < rsi->sub_shader_descriptor_index_count; sub_index++) {
+ shader.widx = save_point;
+
+ if (sub_index != -1)
+ sd = beamformer_shader_descriptors + rsi->sub_shader_descriptor_indices[sub_index];
+
+ i32 *hvector = beamformer_shader_header_vectors[sd - beamformer_shader_descriptors];
+ for (i32 index = 0; index < sd->match_vector_length; index++)
+ stream_append_s8s(&shader, beamformer_shader_global_header_strings[hvector[index]], s8("\n"));
+
+ i32 instance_save_point = shader.widx;
+ arena_commit(&arena, instance_save_point);
+ TempArena arena_save = begin_temp_arena(&arena);
+
+ for (i32 instance = sd->first_match_vector_index;
+ instance < sd->one_past_last_match_vector_index;
+ instance++)
+ {
+ shader.widx = instance_save_point;
+ end_temp_arena(arena_save);
+
+ i32 *match_vector = beamformer_shader_match_vectors[instance];
+ for (i32 index = 0; index < sd->match_vector_length; index++) {
+ stream_append_s8s(&shader, s8("#define "), beamformer_shader_descriptor_header_strings[index], s8(" ("));
+ stream_append_i64(&shader, match_vector[index]);
+ stream_append_s8(&shader, s8(")\n"));
+ }
+
+ if (sd->has_local_flags) {
+ stream_append_s8(&shader, s8("#define ShaderFlags (0x"));
+ stream_append_hex_u64(&shader, (u64)match_vector[sd->match_vector_length]);
+ stream_append_s8(&shader, s8(")\n"));
+ }
+
+ stream_append_s8s(&shader, s8("\n#line 1\n"), file_text);
+
+ arena_commit(&arena, shader.widx - instance_save_point);
+
+ s8 shader_text = stream_to_s8(&shader);
+ /* TODO(rnp): instance name */
+ s8 shader_name = beamformer_shader_names[rsi->kind];
+ glDeleteProgram(cc->programs[instance]);
+ cc->programs[instance] = load_shader(&ctx->os, arena, &shader_text, &src->gl_type, 1, shader_name);
+
+ status.widx = 0;
+ stream_append_s8s(&status, s8("\r\x1b[2Kloaded shader "), shader_name, s8(": ["));
+ stream_append_u64(&status, ++completed);
+ stream_append_s8s(&status, s8("/"));
+ stream_append_u64(&status, total_shaders);
+ stream_append_s8s(&status, s8("]"));
+ os_write_file(ctx->os.error_handle, stream_to_s8(&status));
+ }
+ }
+ os_write_file(ctx->os.error_handle, s8("\n"));
}
function void
@@ -1089,51 +1107,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c
b32 can_commit = 1;
switch (work->kind) {
case BeamformerWorkKind_ReloadShader:{
- ShaderReloadContext *src = work->shader_reload_context;
- b32 success = reload_compute_shader(ctx, src, s8(""), *arena);
- /* TODO(rnp): think of a better way of doing this */
- switch (src->kind) {
- case BeamformerShaderKind_DAS:{
- src->kind = BeamformerShaderKind_DASFast;
- src->shader = cs->programs + src->kind;
- success &= reload_compute_shader(ctx, src, s8(" (Fast)"), *arena);
-
- src->kind = BeamformerShaderKind_DAS;
- src->shader = cs->programs + src->kind;
- }break;
- case BeamformerShaderKind_Decode:{
- read_only local_persist struct { BeamformerShaderKind kind; s8 suffix; } derivatives[] = {
- #define X(k, __1, __2, suffix, ...) {BeamformerShaderKind_## k, s8_comp(suffix)},
- DECODE_SHADER_VARIATIONS
- #undef X
- };
- for EachElement(derivatives, it) {
- src->kind = derivatives[it].kind;
- src->shader = cs->programs + src->kind;
- success &= reload_compute_shader(ctx, src, derivatives[it].suffix, *arena);
- }
- src->kind = BeamformerShaderKind_Decode;
- src->shader = cs->programs + src->kind;
- }break;
- case BeamformerShaderKind_Filter:{
- read_only local_persist struct { BeamformerShaderKind kind; s8 suffix; } derivatives[] = {
- {BeamformerShaderKind_Demodulate, s8_comp(" (Demodulate)")},
- #define X(k, __1, __2, suffix, ...) {BeamformerShaderKind_## k, s8_comp(suffix)},
- FILTER_SHADER_VARIATIONS
- #undef X
- };
- for EachElement(derivatives, it) {
- src->kind = derivatives[it].kind;
- src->shader = cs->programs + src->kind;
- success &= reload_compute_shader(ctx, src, derivatives[it].suffix, *arena);
- }
- src->kind = BeamformerShaderKind_Filter;
- src->shader = cs->programs + src->kind;
- }break;
- default:{}break;
- }
-
- if (success && ctx->latest_frame && !sm->live_imaging_parameters.active) {
+ reload_compute_shader(ctx, work->shader_reload_context, *arena);
+ if (ctx->latest_frame && !sm->live_imaging_parameters.active) {
fill_frame_compute_work(ctx, work, ctx->latest_frame->view_plane_tag, 0, 0);
can_commit = 0;
}
@@ -1235,7 +1210,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, rf->ssbo, slot * rf->size, rf->size);
glBeginQuery(GL_TIME_ELAPSED, cc->shader_timer_ids[0]);
- do_compute_shader(ctx, cp, frame, pipeline->shaders[0], pipeline->parameters + 0, *arena);
+ do_compute_shader(ctx, cp, frame, pipeline->shaders[0], pipeline->program_indices[0],
+ pipeline->parameters + 0, *arena);
glEndQuery(GL_TIME_ELAPSED);
if (work->kind == BeamformerWorkKind_ComputeIndirect) {
@@ -1249,7 +1225,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c
for (u32 i = 1; i < pipeline->shader_count; i++) {
did_sum_shader |= pipeline->shaders[i] == BeamformerShaderKind_Sum;
glBeginQuery(GL_TIME_ELAPSED, cc->shader_timer_ids[i]);
- do_compute_shader(ctx, cp, frame, pipeline->shaders[i], pipeline->parameters + i, *arena);
+ do_compute_shader(ctx, cp, frame, pipeline->shaders[i], pipeline->program_indices[i],
+ pipeline->parameters + i, *arena);
glEndQuery(GL_TIME_ELAPSED);
}
diff --git a/beamformer.h b/beamformer.h
@@ -6,6 +6,9 @@
#include <rlgl.h>
#include "util.h"
+#include "opengl.h"
+
+#include "generated/beamformer.meta.c"
///////////////////
// REQUIRED OS API
@@ -15,7 +18,6 @@ function OS_SHARED_MEMORY_UNLOCK_REGION_FN(os_shared_memory_region_unlock);
function OS_WAKE_WAITERS_FN(os_wake_waiters);
function OS_WRITE_FILE_FN(os_write_file);
-#include "opengl.h"
#include "util_gl.c"
enum gl_vendor_ids {
@@ -106,23 +108,15 @@ typedef struct {
} BeamformerFilter;
#define DAS_SHADER_FLAGS_LIST \
- X(RxColumns, (1 << 0)) \
- X(TxColumns, (1 << 1)) \
- X(Interpolate, (1 << 2)) \
- X(CoherencyWeighting, (1 << 3))
+ X(RxColumns, (1 << 2)) \
+ X(TxColumns, (1 << 3)) \
+ X(Interpolate, (1 << 4)) \
+ X(CoherencyWeighting, (1 << 5))
#define X(k, v, ...) DASShaderFlags_## k = v,
typedef enum {DAS_SHADER_FLAGS_LIST} DASShaderFlags;
#undef X
-static_assert(BeamformerSamplingMode_Count < 4, "filter sample mode mask borked");
-#define FILTER_SHADER_FLAGS_LIST \
- X(SamplingModeMask, ((1 << 0) | (1 << 1))) \
- X(MapChannels, (1 << 2))
-#define X(k, v, ...) FilterShaderFlags_## k = v,
-typedef enum {FILTER_SHADER_FLAGS_LIST} FilterShaderFlags;
-#undef X
-
/* X(name, type, gltype) */
#define BEAMFORMER_FILTER_UBO_PARAM_LIST \
X(input_channel_stride, u32, uint) \
@@ -132,7 +126,6 @@ typedef enum {FILTER_SHADER_FLAGS_LIST} FilterShaderFlags;
X(output_sample_stride, u32, uint) \
X(output_transmit_stride, u32, uint) \
X(decimation_rate, u32, uint) \
- X(shader_flags, u32, int) \
X(demodulation_frequency, f32, float) \
X(sampling_frequency, f32, float)
@@ -174,7 +167,7 @@ typedef alignas(16) struct {
#define X(name, type, ...) type name;
BEAMFORMER_FILTER_UBO_PARAM_LIST
#undef X
- float _pad[2];
+ float _pad[3];
} BeamformerFilterUBO;
static_assert((sizeof(BeamformerFilterUBO) & 15) == 0, "UBO size must be a multiple of 16");
@@ -253,7 +246,8 @@ typedef struct {
} BeamformerRFBuffer;
typedef struct {
- u32 programs[BeamformerShaderKind_ComputeCount];
+ /* TODO(rnp): slightly oversized; remove non compute shaders from match vectors count */
+ u32 programs[beamformer_match_vectors_count];
BeamformerRFBuffer rf_buffer;
BeamformerComputePlan *compute_plans[BeamformerMaxParameterBlockSlots];
@@ -400,14 +394,11 @@ typedef struct {
} BeamformerCtx;
struct ShaderReloadContext {
- BeamformerCtx *beamformer_context;
- s8 path;
- s8 name;
- s8 header;
- u32 *shader;
+ BeamformerCtx *beamformer_context;
ShaderReloadContext *link;
- GLenum gl_type;
- BeamformerShaderKind kind;
+ s8 header;
+ GLenum gl_type;
+ i32 reloadable_info_index;
};
#define BEAMFORMER_FRAME_STEP_FN(name) void name(BeamformerCtx *ctx, BeamformerInput *input)
@@ -419,8 +410,8 @@ typedef BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute_fn);
#define BEAMFORMER_RF_UPLOAD_FN(name) void name(BeamformerUploadThreadContext *ctx, Arena arena)
typedef BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload_fn);
-#define BEAMFORMER_RELOAD_SHADER_FN(name) b32 name(OS *os, BeamformerCtx *ctx, \
- ShaderReloadContext *src, Arena arena, s8 shader_name)
+#define BEAMFORMER_RELOAD_SHADER_FN(name) b32 name(OS *os, s8 path, ShaderReloadContext *src, \
+ Arena arena, s8 shader_name)
typedef BEAMFORMER_RELOAD_SHADER_FN(beamformer_reload_shader_fn);
#define BEAMFORMER_DEBUG_UI_DEINIT_FN(name) void name(BeamformerCtx *ctx)
diff --git a/beamformer.meta b/beamformer.meta
@@ -0,0 +1,45 @@
+@ShaderGroup Compute
+{
+ @Shader CudaDecode
+ @Shader CudaHilbert
+
+ @Shader(decode.glsl) Decode
+ {
+ @Permute(DataKind [Int16 Int16Complex Float32 Float32Complex])
+ }
+
+ @Shader(filter.glsl) Filter
+ {
+ @Permute(DataKind [Int16Complex Float32 Float32Complex])
+ {
+ @PermuteBits([MapChannels ComplexFilter])
+ }
+
+ @SubShader Demodulate
+ {
+ @Permute(DataKind [Int16 Float32])
+ {
+ @Permute(SamplingMode [2X 4X])
+ {
+ @PermuteBits([MapChannels ComplexFilter])
+ }
+ }
+ }
+ }
+
+ @Shader(das.glsl) DAS
+ {
+ @Permute(DataKind [Float32 Float32Complex])
+ {
+ @PermuteBits([Fast Sparse])
+ }
+ }
+
+ @Shader(min_max.glsl) MinMax
+ @Shader(sum.glsl) Sum
+}
+
+@ShaderGroup Render
+{
+ @Shader(render_3d.frag.glsl) Render3D
+}
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -10,49 +10,10 @@
* be organized for simple offset access per frame).
*/
-/* X(enumarant, shader file name, pretty name) */
-#define COMPUTE_SHADERS \
- X(CudaDecode, "", "CUDA Decode") \
- X(CudaHilbert, "", "CUDA Hilbert") \
- X(DAS, "das", "DAS") \
- X(Decode, "decode", "Decode (I16)") \
- X(Filter, "filter", "Filter (F32C)") \
- X(Demodulate, "", "Demodulate (I16)") \
- X(MinMax, "min_max", "Min/Max") \
- X(Sum, "sum", "Sum")
-
-#define DECODE_SHADER_VARIATIONS \
- X(DecodeInt16Complex, "", "Decode (I16C)", " (I16)") \
- X(DecodeFloat, "", "Decode (F32)", " (F32)") \
- X(DecodeFloatComplex, "", "Decode (F32C)", " (F32C)") \
- X(DecodeInt16ToFloat, "", "Decode (I16-F32)", " (I16-F32)")
-
-#define FILTER_SHADER_VARIATIONS \
- X(FilterCF, "", "Filter (F32C-CF)", " (F32C-CF)") \
- X(DemodulateCF, "", "Demodulate (I16-CF)", " (I16-CF)") \
- X(DemodulateFloat, "", "Demodulate (F32)", " (F32)") \
- X(DemodulateFloatCF, "", "Demodulate (F32-CF)", " (F32-CF)")
-
-#define COMPUTE_SHADERS_INTERNAL \
- COMPUTE_SHADERS \
- DECODE_SHADER_VARIATIONS \
- FILTER_SHADER_VARIATIONS \
- X(DASFast, "", "DAS (Fast)")
-
-typedef enum {
- #define X(e, ...) BeamformerShaderKind_##e,
- COMPUTE_SHADERS_INTERNAL
- #undef X
- BeamformerShaderKind_Render3D,
- BeamformerShaderKind_Count,
-
- BeamformerShaderKind_ComputeCount = BeamformerShaderKind_Render3D,
-} BeamformerShaderKind;
-
typedef struct {
/* NOTE(rnp): this wants to be iterated on both dimensions. it depends entirely on which
* visualization method you want to use. the coalescing function wants both directions */
- float times[32][BeamformerShaderKind_Count];
+ float times[32][BeamformerShaderKind_ComputeCount];
float rf_time_deltas[32];
} BeamformerComputeStatsTable;
@@ -61,11 +22,6 @@ typedef struct {
X(NONE, 0, "None") \
X(HADAMARD, 1, "Hadamard")
-#define SAMPLING_MODES_LIST \
- X(NS200BW, 0) \
- X(BS100BW, 1) \
- X(BS50BW, 2)
-
#define TRANSMIT_MODES_LIST \
X(Rows) \
X(Columns)
@@ -82,20 +38,6 @@ typedef enum {TRANSMIT_MODES_LIST} BeamformerTransmitModes;
typedef enum {RECEIVE_MODES_LIST} BeamformerReceiveModes;
#undef X
-#define X(k, v, ...) BeamformerSamplingMode_## k = v,
-typedef enum {SAMPLING_MODES_LIST BeamformerSamplingMode_Count} BeamformerSamplingModes;
-#undef X
-
-#define BEAMFORMER_DATA_KIND_LIST \
- X(Int16, 0) \
- X(Int16Complex, 1) \
- X(Float32, 2) \
- X(Float32Complex, 3)
-
-#define X(k, id) BeamformerDataKind_##k = id,
-typedef enum {BEAMFORMER_DATA_KIND_LIST} BeamformerDataKind;
-#undef X
-
/* TODO(rnp): this is an absolute abuse of the preprocessor, but now is
* not a good time to write a full metaprogram */
#define BEAMFORMER_FILTER_KIND_LIST(type, _) \
@@ -148,10 +90,6 @@ typedef enum {
#define DAS_LOCAL_SIZE_Y 1
#define DAS_LOCAL_SIZE_Z 16
-#define DAS_FAST_LOCAL_SIZE_X 16
-#define DAS_FAST_LOCAL_SIZE_Y 1
-#define DAS_FAST_LOCAL_SIZE_Z 16
-
#define DAS_VOXEL_OFFSET_UNIFORM_LOC 2
#define DAS_CYCLE_T_UNIFORM_LOC 3
#define DAS_FAST_CHANNEL_UNIFORM_LOC 4
diff --git a/beamformer_shared_memory.c b/beamformer_shared_memory.c
@@ -116,6 +116,7 @@ typedef union {
typedef struct {
BeamformerShaderKind shaders[BeamformerMaxComputeShaderStages];
BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages];
+ u32 program_indices[BeamformerMaxComputeShaderStages];
u32 shader_count;
BeamformerDataKind data_kind;
} BeamformerComputePipeline;
diff --git a/build.c b/build.c
@@ -2,6 +2,10 @@
/* NOTE: inspired by nob: https://github.com/tsoding/nob.h */
/* TODO(rnp):
+ * [ ]: refactor: "base" shaders should only be reloadable shaders
+ * - internally when a shader with no file is encountered it should
+ * not get pushed as a "base" shader.
+ * [ ]: bug: column indicator for compile error is off
* [ ]: bake shaders and font data into binary
* - for shaders there is a way of making a separate data section and referring
* to it with extern from the C source (bake both data and size)
@@ -11,10 +15,12 @@
* [ ]: seperate dwarf debug info
*/
#include <stdarg.h>
+#include <setjmp.h>
#include <stdio.h>
#include "util.h"
+#define BeamformerShaderKind_ComputeCount (1)
#include "beamformer_parameters.h"
global char *g_argv0;
@@ -105,21 +111,11 @@ global char *g_argv0;
#define shift(list, count) ((count)--, *(list)++)
-#define da_append_count(a, s, items, item_count) do { \
- da_reserve((a), (s), (s)->count + (item_count)); \
- mem_copy((s)->data + (s)->count, (items), sizeof(*(items)) * (uz)(item_count)); \
- (s)->count += (item_count); \
-} while (0)
-
#define cmd_append_count da_append_count
#define cmd_append(a, s, ...) da_append_count(a, s, ((char *[]){__VA_ARGS__}), \
(iz)(sizeof((char *[]){__VA_ARGS__}) / sizeof(char *)))
-typedef struct {
- char **data;
- iz count;
- iz capacity;
-} CommandList;
+DA_STRUCT(char *, Command);
typedef struct {
b32 debug;
@@ -130,10 +126,11 @@ typedef struct {
} Options;
#define BUILD_LOG_KINDS \
- X(Error, "\x1B[31m[ERROR]\x1B[0m ") \
- X(Warning, "\x1B[33m[WARNING]\x1B[0m ") \
- X(Info, "\x1B[32m[INFO]\x1B[0m ") \
- X(Command, "\x1B[36m[COMMAND]\x1B[0m ")
+ X(Error, "\x1B[31m[ERROR]\x1B[0m ") \
+ X(Warning, "\x1B[33m[WARNING]\x1B[0m ") \
+ X(Generate, "\x1B[32m[GENERATE]\x1B[0m ") \
+ X(Info, "\x1B[33m[INFO]\x1B[0m ") \
+ X(Command, "\x1B[36m[COMMAND]\x1B[0m ")
#define X(t, ...) BuildLogKind_##t,
typedef enum {BUILD_LOG_KINDS BuildLogKind_Count} BuildLogKind;
#undef X
@@ -152,9 +149,10 @@ build_log_base(BuildLogKind kind, char *format, va_list args)
#define build_log_failure(format, ...) build_log(BuildLogKind_Error, \
"failed to build: " format, ##__VA_ARGS__)
-#define build_log_info(...) build_log(BuildLogKind_Info, ##__VA_ARGS__)
-#define build_log_command(...) build_log(BuildLogKind_Command, ##__VA_ARGS__)
-#define build_log_warning(...) build_log(BuildLogKind_Warning, ##__VA_ARGS__)
+#define build_log_generate(...) build_log(BuildLogKind_Generate, ##__VA_ARGS__)
+#define build_log_info(...) build_log(BuildLogKind_Info, ##__VA_ARGS__)
+#define build_log_command(...) build_log(BuildLogKind_Command, ##__VA_ARGS__)
+#define build_log_warning(...) build_log(BuildLogKind_Warning, ##__VA_ARGS__)
function void
build_log(BuildLogKind kind, char *format, ...)
{
@@ -271,7 +269,7 @@ os_wait_close_process(iptr handle)
}
} else {
/* TODO(rnp): handle multiple children */
- INVALID_CODE_PATH;
+ InvalidCodePath;
}
}
return result;
@@ -661,23 +659,6 @@ build_helper_library(Arena arena, CommandList cc)
b32 result = build_shared_library(arena, cc, "ogl_beamformer_lib", library,
libs, libs_count,
arg_list(char *, "helpers/ogl_beamformer_lib.c"));
-
- /////////////
- // header
- char *lib_header_out = OUTPUT("ogl_beamformer_lib.h");
- if (needs_rebuild(lib_header_out, "helpers/ogl_beamformer_lib_base.h")) {
- s8 parameters_header = os_read_whole_file(&arena, "beamformer_parameters.h");
- s8 base_header = os_read_whole_file(&arena, "helpers/ogl_beamformer_lib_base.h");
- result = parameters_header.len != 0 && base_header.len != 0 &&
- parameters_header.data + parameters_header.len == base_header.data;
- if (result) {
- s8 output_file = parameters_header;
- output_file.len += base_header.len;
- result &= os_write_new_file(lib_header_out, output_file);
- }
- if (!result) build_log_failure("%s", lib_header_out);
- }
-
return result;
}
@@ -723,6 +704,16 @@ typedef struct {
iz capacity;
} s8_list;
+function s8
+s8_chop(s8 *in, iz count)
+{
+ count = CLAMP(count, 0, in->len);
+ s8 result = {.data = in->data, .len = count};
+ in->data += count;
+ in->len -= count;
+ return result;
+}
+
function void
s8_split(s8 str, s8 *left, s8 *right, u8 byte)
{
@@ -759,6 +750,7 @@ s8_list_from_s8(s8_list *list, Arena *arena, s8 str)
typedef struct {
Stream stream;
+ Arena scratch;
i32 indentation_level;
} MetaprogramContext;
@@ -772,13 +764,6 @@ meta_write_and_reset(MetaprogramContext *m, char *file)
return result;
}
-function void
-meta_indent(MetaprogramContext *m)
-{
- for (i32 count = m->indentation_level; count > 0; count--)
- stream_append_byte(&m->stream, '\t');
-}
-
#define meta_push(m, ...) meta_push_(m, arg_list(s8, __VA_ARGS__))
function void
meta_push_(MetaprogramContext *m, s8 *items, iz count)
@@ -786,11 +771,15 @@ meta_push_(MetaprogramContext *m, s8 *items, iz count)
stream_append_s8s_(&m->stream, items, count);
}
+#define meta_pad(m, b, n) stream_pad(&(m)->stream, (b), (n))
+#define meta_indent(m) meta_pad((m), '\t', (m)->indentation_level)
#define meta_begin_line(m, ...) do { meta_indent(m); meta_push(m, __VA_ARGS__); } while(0)
-#define meta_end_line(m, ...) do { meta_push(m, __VA_ARGS__, s8("\n")); } while(0)
+#define meta_end_line(m, ...) meta_push(m, __VA_ARGS__, s8("\n"))
#define meta_push_line(m, ...) do { meta_indent(m); meta_push(m, __VA_ARGS__, s8("\n")); } while(0)
#define meta_begin_scope(m, ...) do { meta_push_line(m, __VA_ARGS__); (m)->indentation_level++; } while(0)
#define meta_end_scope(m, ...) do { (m)->indentation_level--; meta_push_line(m, __VA_ARGS__); } while(0)
+#define meta_push_u64(m, n) stream_append_u64(&(m)->stream, (n))
+#define meta_push_u64_hex(m, n) stream_append_hex_u64(&(m)->stream, (n))
#define meta_begin_matlab_class_cracker(_1, _2, FN, ...) FN
#define meta_begin_matlab_class_1(m, name) meta_begin_scope(m, s8("classdef " name))
@@ -828,135 +817,1613 @@ meta_end_and_write_matlab(MetaprogramContext *m, char *path)
return result;
}
-function b32
-build_matlab_bindings(Arena arena)
+#define META_ENTRY_KIND_LIST \
+ X(BeginScope) \
+ X(EndScope) \
+ X(Permute) \
+ X(PermuteBits) \
+ X(Shader) \
+ X(ShaderGroup) \
+ X(SubShader)
+
+#define X(k, ...) MetaEntryKind_## k,
+typedef enum {META_ENTRY_KIND_LIST} MetaEntryKind;
+#undef X
+
+#define X(k, ...) #k,
+read_only global char *meta_entry_kind_strings[] = {META_ENTRY_KIND_LIST};
+#undef X
+
+typedef struct { u32 line, column; } MetaLocation;
+
+#define META_ENTRY_ARGUMENT_KIND_LIST \
+ X(None) \
+ X(String) \
+ X(Array)
+
+#define X(k, ...) MetaEntryArgumentKind_## k,
+typedef enum {META_ENTRY_ARGUMENT_KIND_LIST} MetaEntryArgumentKind;
+#undef X
+
+typedef struct {
+ MetaEntryArgumentKind kind;
+ MetaLocation location;
+ union {
+ s8 string;
+ struct {
+ s8 *strings;
+ u64 count;
+ };
+ };
+} MetaEntryArgument;
+
+typedef struct {
+ MetaEntryKind kind;
+ u32 argument_count;
+ MetaEntryArgument *arguments;
+ s8 name;
+ MetaLocation location;
+} MetaEntry;
+
+typedef struct {
+ MetaEntry *data;
+ iz count;
+ iz capacity;
+ s8 raw;
+} MetaEntryStack;
+
+#define META_PARSE_TOKEN_LIST \
+ X('@', Entry) \
+ X('(', BeginArgs) \
+ X(')', EndArgs) \
+ X('[', BeginArray) \
+ X(']', EndArray) \
+ X('{', BeginScope) \
+ X('}', EndScope)
+
+typedef enum {
+ MetaParseToken_EOF,
+ MetaParseToken_String,
+ #define X(__1, kind, ...) MetaParseToken_## kind,
+ META_PARSE_TOKEN_LIST
+ #undef X
+ MetaParseToken_Count,
+} MetaParseToken;
+
+typedef union {
+ MetaEntryKind kind;
+ s8 string;
+} MetaParseUnion;
+
+typedef struct {
+ s8 s;
+ MetaLocation location;
+} MetaParsePoint;
+
+typedef struct {
+ MetaParsePoint p;
+ MetaParseUnion u;
+ MetaParsePoint save_point;
+} MetaParser;
+
+global char *compiler_file;
+global jmp_buf compiler_jmp_buf;
+
+#define meta_parser_save(v) (v)->save_point = (v)->p
+#define meta_parser_restore(v) swap((v)->p, (v)->save_point)
+#define meta_parser_commit(v) meta_parser_restore(v)
+
+#define meta_compiler_error_message(loc, format, ...) \
+ fprintf(stderr, "%s:%u:%u: error: "format, compiler_file, \
+ loc.line + 1, loc.column + 1, ##__VA_ARGS__)
+
+#define meta_compiler_error(loc, format, ...) do { \
+ meta_compiler_error_message(loc, format, ##__VA_ARGS__); \
+ meta_error(); \
+} while (0)
+
+#define meta_entry_error(e, ...) meta_entry_error_column((e), (i32)(e)->location.column, __VA_ARGS__)
+#define meta_entry_error_column(e, column, ...) do { \
+ meta_compiler_error_message((e)->location, __VA_ARGS__); \
+ meta_entry_print((e), 1, (column)); \
+ meta_error(); \
+} while(0)
+
+#define meta_entry_error_location(e, loc, ...) do { \
+ meta_compiler_error_message((loc), __VA_ARGS__); \
+ meta_entry_print((e), 1, (i32)(loc).column); \
+ meta_error(); \
+} while (0)
+
+function no_return void
+meta_error(void)
{
- b32 result = 1;
- os_make_directory(OUTPUT("matlab"));
+ assert(0);
+ longjmp(compiler_jmp_buf, 1);
+}
- Arena scratch = sub_arena(&arena, MB(1), 16);
+function void
+meta_entry_print(MetaEntry *e, i32 depth, i32 caret)
+{
+ char *kind = meta_entry_kind_strings[e->kind];
+ if (e->kind == MetaEntryKind_BeginScope) kind = "{";
+ if (e->kind == MetaEntryKind_EndScope) kind = "}";
+
+ fprintf(stderr, "%*s@%s", depth * 2, "", kind);
+
+ if (e->argument_count) {
+ fprintf(stderr, "(");
+ for (u32 i = 0; i < e->argument_count; i++) {
+ MetaEntryArgument *a = e->arguments + i;
+ if (i != 0) fprintf(stderr, " ");
+ if (a->kind == MetaEntryArgumentKind_Array) {
+ fprintf(stderr, "[");
+ for (u64 j = 0; j < a->count; j++) {
+ if (j != 0) fprintf(stderr, " ");
+ fprintf(stderr, "%.*s", (i32)a->strings[j].len, a->strings[j].data);
+ }
+ fprintf(stderr, "]");
+ } else {
+ fprintf(stderr, "%.*s", (i32)a->string.len, a->string.data);
+ }
+ }
+ fprintf(stderr, ")");
+ }
+ if (e->name.len) fprintf(stderr, " %.*s", (i32)e->name.len, e->name.data);
- char *out = OUTPUT("matlab/OGLBeamformerLiveFeedbackFlags.m");
- if (needs_rebuild(out, "beamformer_parameters.h")) {
- /* TODO(rnp): recreate/clear directory incase these file names change */
- MetaprogramContext m = {.stream = arena_stream(arena)};
+ if (caret >= 0) fprintf(stderr, "\n%.*s^", depth * 2 + caret, "");
- #define X(name, flag, ...) meta_push_line(&m, s8(#name " (" str(flag) ")"));
- meta_begin_matlab_class(&m, "OGLBeamformerLiveFeedbackFlags", "int32");
- meta_begin_scope(&m, s8("enumeration"));
- BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST
- result &= meta_end_and_write_matlab(&m, out);
+ fprintf(stderr, "\n");
+}
- meta_begin_matlab_class(&m, "OGLBeamformerDataKind", "int32");
- meta_begin_scope(&m, s8("enumeration"));
- BEAMFORMER_DATA_KIND_LIST
- result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerDataKind.m"));
- #undef X
+function MetaEntryKind
+meta_entry_kind_from_string(s8 s)
+{
+ #define X(k, ...) s8_comp(#k),
+ read_only local_persist s8 kinds[] = {META_ENTRY_KIND_LIST};
+ #undef X
+ i32 result = -1;
+ for EachElement(kinds, it) {
+ if (s8_equal(kinds[it], s)) {
+ result = (i32)it;
+ break;
+ }
+ }
+ return (MetaEntryKind)result;
+}
- #define X(kind, ...) meta_push_matlab_enum_with_value(&m, s8(#kind), BeamformerFilterKind_## kind);
- meta_begin_matlab_class(&m, "OGLBeamformerFilterKind", "int32");
- meta_begin_scope(&m, s8("enumeration"));
- BEAMFORMER_FILTER_KIND_LIST(,)
- result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerFilterKind.m"));
- #undef X
+function void
+meta_parser_trim(MetaParser *p)
+{
+ u8 *s, *end = p->p.s.data + p->p.s.len;
+ b32 done = 0;
+ b32 comment = 0;
+ for (s = p->p.s.data; !done && s != end;) {
+ switch (*s) {
+ case '\r': case '\t': case ' ':
+ {
+ p->p.location.column++;
+ }break;
+ case '\n':{ p->p.location.line++; p->p.location.column = 0; comment = 0; }break;
+ case '/':{
+ comment = ((s + 1) != end && s[1] == '/');
+ } /* FALLTHROUGH */
+ default:{done = !comment;}break;
+ }
+ if (!done) s++;
+ }
+ p->p.s.data = s;
+ p->p.s.len = end - s;
+}
- #define X(kind, ...) meta_push_matlab_enum_with_value(&m, s8(#kind), BeamformerShaderKind_## kind);
- meta_begin_matlab_class(&m, "OGLBeamformerShaderStage", "int32");
- meta_begin_scope(&m, s8("enumeration"));
- COMPUTE_SHADERS
- result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerShaderStage.m"));
+function s8
+meta_parser_extract_string(MetaParser *p)
+{
+ s8 result = {.data = p->p.s.data};
+ for (; result.len < p->p.s.len; result.len++) {
+ b32 done = 0;
+ switch (p->p.s.data[result.len]) {
+ #define X(t, ...) case t:
+ META_PARSE_TOKEN_LIST
#undef X
+ case ' ': case '\n': case '\r': case '\t':
+ {done = 1;}break;
+ case '/':{
+ done = (result.len + 1 < p->p.s.len) && (p->p.s.data[result.len + 1] == '/');
+ }break;
+ default:{}break;
+ }
+ if (done) break;
+ }
+ p->p.location.column += (u32)result.len;
+ p->p.s.data += result.len;
+ p->p.s.len -= result.len;
+ return result;
+}
- #define X(kind, ...) meta_push_matlab_enum_with_value(&m, s8(#kind), BeamformerTransmitMode_## kind);
- meta_begin_matlab_class(&m, "OGLBeamformerTransmitModes", "int32");
- meta_begin_scope(&m, s8("enumeration"));
- TRANSMIT_MODES_LIST
- result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerTransmitModes.m"));
+function s8
+meta_parser_token_name(MetaParser *p, MetaParseToken t)
+{
+ s8 result = s8("\"invalid\"");
+ read_only local_persist s8 names[MetaParseToken_Count] = {
+ [MetaParseToken_EOF] = s8_comp("\"EOF\""),
+ #define X(k, v, ...) [MetaParseToken_## v] = s8_comp(#k),
+ META_PARSE_TOKEN_LIST
#undef X
+ };
+ if (t >= 0 && t < countof(names)) result = names[t];
+ if (t == MetaParseToken_String) result = p->u.string;
+ return result;
+}
- #define X(kind, ...) meta_push_matlab_enum_with_value(&m, s8(#kind), BeamformerReceiveMode_## kind);
- meta_begin_matlab_class(&m, "OGLBeamformerReceiveModes", "int32");
- meta_begin_scope(&m, s8("enumeration"));
- RECEIVE_MODES_LIST
- result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerReceiveModes.m"));
+function MetaParseToken
+meta_parser_token(MetaParser *p)
+{
+ MetaParseToken result = MetaParseToken_EOF;
+ meta_parser_save(p);
+ if (p->p.s.len > 0) {
+ b32 chop = 1;
+ switch (p->p.s.data[0]) {
+ #define X(t, kind, ...) case t:{ result = MetaParseToken_## kind; }break;
+ META_PARSE_TOKEN_LIST
#undef X
+ default:{ result = MetaParseToken_String; chop = 0; }break;
+ }
+ if (chop) { s8_chop(&p->p.s, 1); p->p.location.column++; }
- #define X(kind, v, ...) meta_push_line(&m, s8(#kind " (" #v ")"));
- meta_begin_matlab_class(&m, "OGLBeamformerSamplingModes", "int32");
- meta_begin_scope(&m, s8("enumeration"));
- SAMPLING_MODES_LIST
- result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerSamplingModes.m"));
- #undef X
+ meta_parser_trim(p);
+ switch (result) {
+ case MetaParseToken_String:{ p->u.string = meta_parser_extract_string(p); }break;
+
+ /* NOTE(rnp): '{' and '}' are shorthand for @BeginScope and @EndScope */
+ case MetaParseToken_BeginScope:{ p->u.kind = MetaEntryKind_BeginScope; }break;
+ case MetaParseToken_EndScope:{ p->u.kind = MetaEntryKind_EndScope; }break;
+
+ case MetaParseToken_Entry:{
+ s8 kind = meta_parser_extract_string(p);
+ p->u.kind = meta_entry_kind_from_string(kind);
+ if (p->u.kind < 0) {
+ meta_compiler_error(p->p.location, "invalid meta kind: %.*s\n", (i32)kind.len, kind.data);
+ }
+ }break;
+ default:{}break;
+ }
+ meta_parser_trim(p);
+ }
+
+ return result;
+}
+
+function MetaParseToken
+meta_parser_peek_token(MetaParser *p)
+{
+ MetaParseToken result = meta_parser_token(p);
+ meta_parser_restore(p);
+ return result;
+}
+
+function void
+meta_parser_unexpected_token(MetaParser *p, MetaParseToken t)
+{
+ meta_parser_restore(p);
+ s8 token_name = meta_parser_token_name(p, t);
+ meta_compiler_error(p->p.location, "unexpected token: %.*s\n", (i32)token_name.len, token_name.data);
+}
+
+function void
+meta_parser_arguments(MetaParser *p, MetaEntry *e, Arena *arena)
+{
+ if (meta_parser_peek_token(p) == MetaParseToken_BeginArgs) {
+ meta_parser_commit(p);
+
+ MetaEntryArgument *arg = e->arguments = push_struct(arena, MetaEntryArgument);
+ b32 array = 0;
+ for (MetaParseToken token = meta_parser_token(p);
+ token != MetaParseToken_EndArgs;
+ token = meta_parser_token(p))
+ {
+ if (!arg) arg = push_struct(arena, MetaEntryArgument);
+ switch (token) {
+ case MetaParseToken_String:{
+ if (array) {
+ assert((u8 *)(arg->strings + arg->count) == arena->beg);
+ *push_struct(arena, s8) = p->u.string;
+ arg->count++;
+ } else {
+ e->argument_count++;
+ arg->kind = MetaEntryArgumentKind_String;
+ arg->string = p->u.string;
+ arg->location = p->p.location;
+ arg = 0;
+ }
+ }break;
+ case MetaParseToken_BeginArray:{
+ arg->kind = MetaEntryArgumentKind_Array;
+ arg->strings = (s8 *)arena_aligned_start(*arena, alignof(s8));
+ arg->location = p->p.location;
+ array = 1;
+ }break;
+ case MetaParseToken_EndArray:{
+ e->argument_count++;
+ array = 0;
+ arg = 0;
+ }break;
+ default:{ meta_parser_unexpected_token(p, token); }break;
+ }
+ }
+ }
+}
+
+function MetaEntryStack
+meta_entry_stack_from_file(Arena *arena, Arena scratch, char *file)
+{
+ MetaParser parser = {.p.s = os_read_whole_file(arena, file)};
+ MetaEntryStack result = {.raw = parser.p.s};
+
+ compiler_file = file;
+
+ meta_parser_trim(&parser);
+
+ for (MetaParseToken token = meta_parser_token(&parser);
+ token != MetaParseToken_EOF;
+ token = meta_parser_token(&parser))
+ {
+ MetaEntry *e = da_push(arena, &result);
+ switch (token) {
+ case MetaParseToken_BeginScope:
+ case MetaParseToken_EndScope:
+ case MetaParseToken_Entry:
+ {
+ e->kind = parser.u.kind;
+ e->location = parser.save_point.location;
+
+ if (token == MetaParseToken_Entry)
+ meta_parser_arguments(&parser, e, arena);
+
+ if (meta_parser_peek_token(&parser) == MetaParseToken_String) {
+ meta_parser_commit(&parser);
+ e->name = parser.u.string;
+ }
+ }break;
+
+ default:{ meta_parser_unexpected_token(&parser, token); }break;
+ }
+ }
+
+ return result;
+}
+
+#define meta_entry_argument_expected(e, ...) \
+ meta_entry_argument_expected_((e), arg_list(s8, __VA_ARGS__))
+function void
+meta_entry_argument_expected_(MetaEntry *e, s8 *args, uz count)
+{
+ if (e->argument_count != count) {
+ meta_compiler_error_message(e->location, "incorrect argument count for entry %s() got: %u expected: %u\n",
+ meta_entry_kind_strings[e->kind], e->argument_count, (u32)count);
+ fprintf(stderr, " format: @%s(", meta_entry_kind_strings[e->kind]);
+ for (uz i = 0; i < count; i++) {
+ if (i != 0) fprintf(stderr, ", ");
+ fprintf(stderr, "%.*s", (i32)args[i].len, args[i].data);
+ }
+ fprintf(stderr, ")\n");
+ meta_error();
+ }
+}
+
+function MetaEntryArgument
+meta_entry_argument_expect(MetaEntry *e, u32 index, MetaEntryArgumentKind kind)
+{
+ #define X(k, ...) #k,
+ read_only local_persist char *kinds[] = {META_ENTRY_ARGUMENT_KIND_LIST};
+ #undef X
+
+ assert(e->argument_count > index);
+ MetaEntryArgument result = e->arguments[index];
+
+ if (result.kind != kind) {
+ meta_entry_error_location(e, result.location, "unexpected argument kind: expected %s but got: %s\n",
+ kinds[kind], kinds[result.kind]);
+ }
+
+ if (kind == MetaEntryArgumentKind_Array && result.count == 0)
+ meta_entry_error_location(e, result.location, "array arguments must have at least 1 element\n");
+
+ return result;
+}
+
+typedef struct {
+ s8_list *data;
+ iz count;
+ iz capacity;
+} s8_list_table;
+
+typedef struct {
+ iz kind;
+ iz variation;
+} MetaPermutation;
+
+typedef struct {
+ u32 *data;
+ iz count;
+ iz capacity;
+} MetaIDList;
+
+typedef struct {
+ u32 *global_flags;
+ u8 *local_flags;
+ u8 global_flags_count;
+ u8 local_flags_count;
+} MetaShaderPermutation;
+DA_STRUCT(MetaShaderPermutation, MetaShaderPermutation);
+
+typedef struct {
+ MetaShaderPermutationList permutations;
+ MetaIDList global_flag_ids;
+ u32 base_name_id;
+ u32 flag_list_id;
+} MetaShader;
+DA_STRUCT(MetaShader, MetaShader);
+
+typedef struct {
+ MetaShader *shader;
+ MetaIDList sub_shaders;
+ s8 file;
+} MetaBaseShader;
+DA_STRUCT(MetaBaseShader, MetaBaseShader);
+
+typedef struct {
+ i32 first_match_vector_index;
+ i32 one_past_last_match_vector_index;
+ i32 sub_field_count;
+ b32 has_local_flags;
+} MetaShaderDescriptor;
+
+typedef struct {
+ s8 name;
+ MetaIDList shaders;
+} MetaShaderGroup;
+DA_STRUCT(MetaShaderGroup, MetaShaderGroup);
+
+typedef struct {
+ Arena *arena, scratch;
+
+ s8_list permutation_kinds;
+ s8_list_table permutations_for_kind;
+
+ s8_list_table flags_for_shader;
+
+ MetaShaderGroupList shader_groups;
+ MetaShaderList shaders;
+ MetaBaseShaderList base_shaders;
+ s8_list shader_names;
+
+ MetaShaderDescriptor *shader_descriptors;
+} MetaContext;
+
+function iz
+meta_lookup_string_slow(s8_list *sv, s8 s)
+{
+ // TODO(rnp): obviously this is slow
+ iz result = -1;
+ for (iz i = 0; i < sv->count; i++) {
+ if (s8_equal(s, sv->data[i])) {
+ result = i;
+ break;
+ }
+ }
+ return result;
+}
+
+function iz
+meta_lookup_id_slow(MetaIDList *v, u32 id)
+{
+ // TODO(rnp): obviously this is slow
+ iz result = -1;
+ for (iz i = 0; i < v->count; i++) {
+ if (id == v->data[i]) {
+ result = i;
+ break;
+ }
+ }
+ return result;
+}
+
+function iz
+meta_intern_string(MetaContext *ctx, s8_list *sv, s8 s)
+{
+ iz result = meta_lookup_string_slow(sv, s);
+ if (result < 0) {
+ *da_push(ctx->arena, sv) = s;
+ result = sv->count - 1;
+ }
+ return result;
+}
+
+function iz
+meta_intern_id(MetaContext *ctx, MetaIDList *v, u32 id)
+{
+ iz result = meta_lookup_id_slow(v, id);
+ if (result < 0) {
+ *da_push(ctx->arena, v) = id;
+ result = v->count - 1;
+ }
+ return result;
+}
+
+function MetaPermutation
+meta_commit_permutation(MetaContext *ctx, s8 kind, s8 variation)
+{
+ iz kidx = meta_intern_string(ctx, &ctx->permutation_kinds, kind);
+ if (ctx->permutation_kinds.count != ctx->permutations_for_kind.count) {
+ da_push(ctx->arena, &ctx->permutations_for_kind);
+ assert(kidx == (ctx->permutations_for_kind.count - 1));
+ }
+
+ iz vidx = meta_intern_string(ctx, ctx->permutations_for_kind.data + kidx, variation);
+ MetaPermutation result = {.kind = kidx, .variation = vidx};
+ return result;
+}
+
+function u16
+meta_pack_shader_name(MetaContext *ctx, s8 base_name, MetaLocation loc)
+{
+ iz result = meta_intern_string(ctx, &ctx->shader_names, base_name);
+ if (result > (iz)U16_MAX)
+ meta_compiler_error(loc, "maximum base shaders exceeded: limit: %lu\n", U16_MAX);
+ return (u16)result;
+}
+
+function u8
+meta_commit_shader_flag(MetaContext *ctx, u32 flag_list_id, s8 flag, MetaEntry *e)
+{
+ assert(flag_list_id < ctx->flags_for_shader.count);
+ iz index = meta_intern_string(ctx, ctx->flags_for_shader.data + flag_list_id, flag);
+ if (index > 7) meta_entry_error(e, "Shaders only support 8 local flags\n");
+ u8 result = (u8)index;
+ return result;
+}
+
+typedef struct {
+ u16 entry_id;
+ struct {u8 current; u8 target;} cursor;
+ u32 permutation_id;
+} MetaShaderPermutationStackFrame;
+
+typedef struct {
+ MetaEntry *base_entry;
+
+ MetaShaderPermutationStackFrame *data;
+ iz count;
+ iz capacity;
+} MetaShaderPermutationStack;
+
+function void
+meta_pack_shader_permutation(MetaContext *ctx, MetaShaderPermutation *sp, MetaShader *base_shader,
+ u32 local_flags, MetaShaderPermutationStack *stack, MetaEntry *last,
+ u32 frame_cursor)
+{
+ ////////////////////////////////////
+ // NOTE: fill ids from up the stack
+ u32 local_flag_index = 0;
+ u32 global_flag_index = 0;
+ for (iz i = 0; i < stack->count; i++) {
+ MetaShaderPermutationStackFrame *f = stack->data + i;
+ MetaEntry *e = stack->base_entry + f->entry_id;
+ MetaEntryArgument *a = e->arguments;
+ u32 cursor = f->cursor.current;
+ switch (e->kind) {
+ case MetaEntryKind_PermuteBits:{
+ if (f->permutation_id == U32_MAX)
+ f->permutation_id = meta_commit_shader_flag(ctx, base_shader->flag_list_id, a->strings[cursor], e);
+ sp->local_flags[local_flag_index++] = (u8)(1u << f->permutation_id);
+ }break;
+ case MetaEntryKind_Permute:{
+ if (f->permutation_id == U32_MAX) {
+ MetaPermutation p = meta_commit_permutation(ctx, a[0].string, a[1].strings[cursor]);
+ f->permutation_id = ((u32)(p.kind & 0xFFFFu) << 16) | (u32)(p.variation & 0xFFFFu);
+ meta_intern_id(ctx, &base_shader->global_flag_ids, (u32)p.kind);
+ }
+ sp->global_flags[global_flag_index++] = f->permutation_id;
+ }break;
+ InvalidDefaultCase;
+ }
+ }
+
+ ///////////////////////////////////
+ // NOTE: fill ids from stack frame
+ MetaEntryArgument *a = last->arguments;
+ switch (last->kind) {
+ case MetaEntryKind_PermuteBits:{
+ u32 packed = local_flags;
+ u32 test = frame_cursor;
+ for EachBit(test, flag) {
+ u32 flag_index = meta_commit_shader_flag(ctx, base_shader->flag_list_id, a->strings[flag], last);
+ packed |= (1u << flag_index);
+ }
+ sp->local_flags[local_flag_index++] = (u8)packed;
+ }break;
+ case MetaEntryKind_Permute:{
+ MetaPermutation p = meta_commit_permutation(ctx, a[0].string, a[1].strings[frame_cursor]);
+ u32 packed = ((u32)(p.kind & 0xFFFFu) << 16) | (u32)(p.variation & 0xFFFFu);
+ sp->global_flags[global_flag_index++] = packed;
+ meta_intern_id(ctx, &base_shader->global_flag_ids, (u32)p.kind);
+ }break;
+ InvalidDefaultCase;
+ }
+}
+
+function void
+meta_pop_and_pack_shader_permutations(MetaContext *ctx, MetaShader *base_shader, u32 local_flags,
+ MetaShaderPermutationStack *stack)
+{
+ assert(stack->count > 0);
+
+ u32 global_flag_count = 0;
+ u32 local_flag_count = 0;
+
+ for (iz i = 0; i < stack->count; i++) {
+ switch (stack->base_entry[stack->data[i].entry_id].kind) {
+ case MetaEntryKind_PermuteBits:{ local_flag_count++; }break;
+ case MetaEntryKind_Permute:{ global_flag_count++; }break;
+ InvalidDefaultCase;
+ }
+ }
+
+ MetaShaderPermutationStackFrame *f = stack->data + (--stack->count);
+ MetaEntry *last = stack->base_entry + f->entry_id;
+ assert(f->cursor.current == 0);
+ for (u32 cursor = 0; cursor < f->cursor.target; cursor++) {
+ MetaShaderPermutation *sp = da_push(ctx->arena, &base_shader->permutations);
+ sp->global_flags_count = (u8)global_flag_count;
+ sp->local_flags_count = (u8)local_flag_count;
+ sp->global_flags = push_array(ctx->arena, typeof(*sp->global_flags), global_flag_count);
+ sp->local_flags = push_array(ctx->arena, typeof(*sp->local_flags), local_flag_count);
+
+ meta_pack_shader_permutation(ctx, sp, base_shader, local_flags, stack, last, cursor);
+ }
+}
+
+function void
+meta_emit_shader_permutations(MetaContext *ctx, Arena scratch, MetaShader *s, u32 local_flags,
+ MetaEntry *entries, iz entry_count)
+{
+ assert(entry_count > 0);
+ assert(entries[0].kind == MetaEntryKind_Permute ||
+ entries[0].kind == MetaEntryKind_PermuteBits ||
+ entries[0].kind == MetaEntryKind_SubShader);
+
+ MetaShaderPermutationStack stack = {.base_entry = entries};
+ da_reserve(&scratch, &stack, 32);
+
+ b32 done = 0;
+ for (iz i = 0; i < entry_count && !done; i++) {
+ MetaEntry *e = entries + i;
+ switch (e->kind) {
+ case MetaEntryKind_PermuteBits:
+ case MetaEntryKind_Permute:
+ {
+ if (stack.count && stack.data[stack.count - 1].entry_id == (u16)i) {
+ MetaShaderPermutationStackFrame *f = stack.data + (stack.count - 1);
+ f->permutation_id = U32_MAX;
+ f->cursor.current++;
+ if (f->cursor.current == f->cursor.target) {
+ stack.count--;
+ done = stack.count == 0;
+ }
+ } else {
+ u8 target;
+ if (e->kind == MetaEntryKind_Permute) {
+ meta_entry_argument_expected(e, s8("kind"), s8("[id ...]"));
+ target = (u8)meta_entry_argument_expect(e, 1, MetaEntryArgumentKind_Array).count;
+ } else {
+ meta_entry_argument_expected(e, s8("[id ...]"));
+ u32 count = (u32)meta_entry_argument_expect(e, 0, MetaEntryArgumentKind_Array).count;
+ target = (u8)(2u << (count - 1));
+ }
+ *da_push(&scratch, &stack) = (MetaShaderPermutationStackFrame){
+ .entry_id = (u16)i,
+ .permutation_id = U32_MAX,
+ .cursor.target = target,
+ };
+ }
+ }break;
+ case MetaEntryKind_SubShader:{}break;
+ case MetaEntryKind_BeginScope:{}break;
+ case MetaEntryKind_EndScope:{
+ meta_pop_and_pack_shader_permutations(ctx, s, local_flags, &stack);
+ if (stack.count != 0)
+ i = stack.data[stack.count - 1].entry_id - 1;
+ }break;
+ InvalidDefaultCase;
+ }
+ }
+ if (stack.count) {
+ assert(stack.count == 1);
+ meta_pop_and_pack_shader_permutations(ctx, s, local_flags, &stack);
+ }
+}
+
+function iz
+meta_pack_shader(MetaContext *ctx, MetaShaderGroup *sg, Arena scratch, MetaEntry *entries, iz entry_count)
+{
+ assert(entries[0].kind == MetaEntryKind_Shader);
+
+ MetaBaseShader *base_shader = da_push(ctx->arena, &ctx->base_shaders);
+ MetaShader *s = da_push(ctx->arena, &ctx->shaders);
+ *da_push(ctx->arena, &sg->shaders) = (u32)da_index(s, &ctx->shaders);
+ {
+ s8_list *flag_list = da_push(ctx->arena, &ctx->flags_for_shader);
+ s->flag_list_id = (u32)da_index(flag_list, &ctx->flags_for_shader);
+ }
+
+ base_shader->shader = s;
+ if (entries->argument_count > 1) {
+ meta_entry_argument_expected(entries, s8("[file_name]"));
+ } else if (entries->argument_count == 1) {
+ base_shader->file = meta_entry_argument_expect(entries, 0, MetaEntryArgumentKind_String).string;
+ }
+ s->base_name_id = meta_pack_shader_name(ctx, entries->name, entries->location);
+
+ i32 stack_items[32];
+ struct { i32 *data; iz capacity; iz count; } stack = {stack_items, countof(stack_items), 0};
+
+ iz result;
+ b32 in_sub_shader = 0;
+ for (result = 0; result < entry_count; result++) {
+ MetaEntry *e = entries + result;
+ switch (e->kind) {
+ case MetaEntryKind_BeginScope:{}break;
+ case MetaEntryKind_SubShader:{
+ if (in_sub_shader) goto error;
+ in_sub_shader = 1;
+ } /* FALLTHROUGH */
+ case MetaEntryKind_PermuteBits:
+ case MetaEntryKind_Permute:
+ case MetaEntryKind_Shader:
+ {
+ *da_push(&scratch, &stack) = (i32)result;
+ if ((result + 1 < entry_count) && entries[result + 1].kind == MetaEntryKind_BeginScope)
+ break;
+ } /* FALLTHROUGH */
+ case MetaEntryKind_EndScope:{
+ i32 index = stack.data[--stack.count];
+ MetaEntry *ended = entries + index;
+ if (index == 0) {
+ assert(stack.count == 0 && ended->kind == MetaEntryKind_Shader);
+ // NOTE(rnp): emit an empty single permutation
+ if (s->permutations.count == 0)
+ da_push(ctx->arena, &s->permutations);
+ } else {
+ u32 local_flags = 0;
+ if (stack.count > 0 && entries[stack.data[stack.count - 1]].kind == MetaEntryKind_Shader) {
+ MetaShader *fill = s;
+ if (ended->kind == MetaEntryKind_SubShader) {
+ fill = da_push(ctx->arena, &ctx->shaders);
+ u32 sid = (u32)da_index(fill, &ctx->shaders);
+ *da_push(ctx->arena, &sg->shaders) = sid;
+ *da_push(ctx->arena, &base_shader->sub_shaders) = sid;
+
+ fill->flag_list_id = s->flag_list_id;
+ fill->base_name_id = meta_pack_shader_name(ctx, ended->name, ended->location);
+ local_flags = 1u << meta_commit_shader_flag(ctx, s->flag_list_id, ended->name, ended);
+ in_sub_shader = 0;
+ }
+ meta_emit_shader_permutations(ctx, scratch, fill, local_flags, ended, result - index + 1);
+ }
+ }
+ }break;
+
+ default:
+ error:
+ {
+ meta_entry_error(e, "invalid nested @%s() in @%s()\n",
+ meta_entry_kind_strings[e->kind],
+ meta_entry_kind_strings[MetaEntryKind_Shader]);
+ }break;
+ }
+ if (stack.count == 0)
+ break;
+ }
+
+ return result;
+}
+
+function MetaPermutation
+metagen_unpack_permutation(MetaContext *ctx, u32 packed)
+{
+ MetaPermutation result;
+ result.kind = (iz)(packed >> 16u);
+ result.variation = (iz)(packed & 0xFFFFu);
+ assert(result.kind < ctx->permutation_kinds.count);
+ assert(result.variation < ctx->permutations_for_kind.data[result.kind].count);
+ return result;
+}
+
+function s8
+metagen_permutation_kind(MetaContext *ctx, u32 packed)
+{
+ MetaPermutation p = metagen_unpack_permutation(ctx, packed);
+ s8 result = ctx->permutation_kinds.data[p.kind];
+ return result;
+}
+
+function s8
+metagen_permutation_variation(MetaContext *ctx, u32 packed)
+{
+ MetaPermutation p = metagen_unpack_permutation(ctx, packed);
+ s8 result = ctx->permutations_for_kind.data[p.kind].data[p.variation];
+ return result;
+}
+
+function void
+metagen_push_table(MetaprogramContext *m, Arena scratch, s8 row_start, s8 row_end,
+ s8 **column_strings, uz rows, uz columns)
+{
+ u32 *column_widths = 0;
+ if (columns > 1) {
+ column_widths = push_array(&scratch, u32, (iz)columns - 1);
+ for (uz column = 0; column < columns - 1; column++) {
+ s8 *strings = column_strings[column];
+ for (uz row = 0; row < rows; row++)
+ column_widths[column] = MAX(column_widths[column], (u32)strings[row].len);
+ }
+ }
+
+ for (uz row = 0; row < rows; row++) {
+ meta_begin_line(m, row_start);
+ for (uz column = 0; column < columns; column++) {
+ s8 text = column_strings[column][row];
+ meta_push(m, text);
+ i32 pad = columns > 1 ? 1 : 0;
+ if (column_widths && column < columns - 1)
+ pad += (i32)column_widths[column] - (i32)text.len;
+ if (column < columns - 1) meta_pad(m, ' ', pad);
+ }
+ meta_end_line(m, row_end);
+ }
+}
+
+function void
+metagen_push_c_struct(MetaprogramContext *m, s8 kind, s8 *types, uz types_count, s8 *fields, uz fields_count)
+{
+ assert(fields_count == types_count);
+ meta_begin_scope(m, s8("typedef struct {"));
+ metagen_push_table(m, m->scratch, s8(""), s8(";"), (s8 *[]){types, fields}, fields_count, 2);
+ meta_end_scope(m, s8("} "), kind, s8(";\n"));
+}
+
+function void
+metagen_push_counted_enum_body(MetaprogramContext *m, s8 kind, s8 prefix, s8 mid, s8 suffix, s8 *ids, iz ids_count)
+{
+ iz max_id_length = 0;
+ for (iz id = 0; id < ids_count; id++)
+ max_id_length = MAX(max_id_length, ids[id].len);
+
+ for (iz id = 0; id < ids_count; id++) {
+ meta_begin_line(m, prefix, kind, ids[id]);
+ meta_pad(m, ' ', 1 + (i32)(max_id_length - ids[id].len));
+ meta_push(m, mid);
+ meta_push_u64(m, (u64)id);
+ meta_end_line(m, suffix);
+ }
+}
+
+function void
+metagen_push_c_enum(MetaprogramContext *m, Arena scratch, s8 kind, s8 *ids, iz ids_count)
+{
+ s8 kind_full = push_s8_from_parts(&scratch, s8(""), kind, s8("_"));
+ meta_begin_scope(m, s8("typedef enum {"));
+ metagen_push_counted_enum_body(m, kind_full, s8(""), s8("= "), s8(","), ids, ids_count);
+ meta_push_line(m, kind_full, s8("Count,"));
+ meta_end_scope(m, s8("} "), kind, s8(";\n"));
+}
+
+function void
+metagen_push_c_flag_enum(MetaprogramContext *m, Arena scratch, s8 kind, s8 *ids, iz ids_count)
+{
+ s8 kind_full = push_s8_from_parts(&scratch, s8(""), kind, s8("_"));
+ meta_begin_scope(m, s8("typedef enum {"));
+ metagen_push_counted_enum_body(m, kind_full, s8(""), s8("= (1 << "), s8("),"), ids, ids_count);
+ meta_end_scope(m, s8("} "), kind, s8(";\n"));
+}
+
+function void
+metagen_push_shader_derivative_vectors(MetaContext *ctx, MetaprogramContext *m, MetaShader *s,
+ i32 sub_field_count, b32 has_local_flags)
+{
+ meta_push_line(m, s8("// "), ctx->shader_names.data[s->base_name_id]);
+ for (iz perm = 0; perm < s->permutations.count; perm++) {
+ MetaShaderPermutation *p = s->permutations.data + perm;
+ if (!has_local_flags && sub_field_count == 0) {
+ meta_push_line(m, s8("0,"));
+ } else {
+ meta_begin_line(m, s8("(i32 []){"));
+ for (u8 id = 0; id < p->global_flags_count; id++) {
+ s8 kind = metagen_permutation_kind(ctx, p->global_flags[id]);
+ s8 variation = metagen_permutation_variation(ctx, p->global_flags[id]);
+ if (id != 0) meta_push(m, s8(", "));
+ meta_push(m, s8("Beamformer"), kind, s8("_"), variation);
+ }
+
+ for (i32 id = p->global_flags_count; id < sub_field_count; id++)
+ meta_push(m, s8(", -1"));
+
+ // NOTE(rnp): local flag names
+ if (has_local_flags) {
+ u64 local_flags = 0;
+ for (u8 id = 0; id < p->local_flags_count; id++)
+ local_flags |= p->local_flags[id];
+
+ meta_push(m, s8(", 0x"));
+ meta_push_u64_hex(m, local_flags);
+ }
+ meta_end_line(m, s8("},"));
+ }
+ }
+}
+
+function void
+meta_push_shader_descriptors_table(MetaprogramContext *m, MetaContext *ctx)
+{
+ Arena scratch_start = m->scratch;
+ s8 *columns[4];
+ for EachElement(columns, it)
+ columns[it] = push_array(&m->scratch, s8, ctx->shaders.count);
+
+ Stream sb = arena_stream(m->scratch);
+ for (iz shader = 0; shader < ctx->shaders.count; shader++) {
+ MetaShaderDescriptor *sd = ctx->shader_descriptors + shader;
+
+ stream_append_u64(&sb, (u64)sd->first_match_vector_index);
+ stream_append_byte(&sb, ',');
+ columns[0][shader] = arena_stream_commit_and_reset(&m->scratch, &sb);
+
+ stream_append_u64(&sb, (u64)sd->one_past_last_match_vector_index);
+ stream_append_byte(&sb, ',');
+ columns[1][shader] = arena_stream_commit_and_reset(&m->scratch, &sb);
+
+ stream_append_u64(&sb, (u64)sd->sub_field_count);
+ stream_append_byte(&sb, ',');
+ columns[2][shader] = arena_stream_commit_and_reset(&m->scratch, &sb);
+
+ columns[3][shader] = sd->has_local_flags ? s8("1") : s8 ("0");
+ }
+
+ meta_begin_scope(m, s8("read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = {"));
+ metagen_push_table(m, m->scratch, s8("{"), s8("},"), columns, (u32)ctx->shaders.count, countof(columns));
+ meta_end_scope(m, s8("};\n"));
+
+ m->scratch = scratch_start;
+}
+
+function void
+meta_push_shader_reload_info(MetaprogramContext *m, MetaContext *ctx)
+{
+ ///////////////////////////////
+ // NOTE(rnp): reloadable infos
+ i32 max_shader_name_length = 0;
+ for (iz shader = 0; shader < ctx->base_shaders.count; shader++) {
+ if (ctx->base_shaders.data[shader].file.len == 0) continue;
+ s8 name = ctx->shader_names.data[ctx->base_shaders.data[shader].shader->base_name_id];
+ max_shader_name_length = MAX((i32)name.len, max_shader_name_length);
+ }
+
+ meta_begin_scope(m, s8("read_only global BeamformerReloadableShaderInfo beamformer_reloadable_shader_infos[] = {"));
+ for (iz shader = 0; shader < ctx->base_shaders.count; shader++) {
+ MetaBaseShader *bs = ctx->base_shaders.data + shader;
+ MetaShader *s = bs->shader;
+
+ if (bs->file.len == 0) continue;
+
+ s8 name = ctx->shader_names.data[s->base_name_id];
+ meta_begin_line(m, s8("{BeamformerShaderKind_"), name, s8(", "));
+ meta_pad(m, ' ', max_shader_name_length - (i32)name.len);
+ meta_push_u64(m, (u64)bs->sub_shaders.count);
+
+ if (bs->sub_shaders.count) {
+ meta_push(m, s8(", (i32 []){"));
+ for (iz sub_shader = 0; sub_shader < bs->sub_shaders.count; sub_shader++) {
+ if (sub_shader != 0) meta_push(m, s8(", "));
+ meta_push_u64(m, bs->sub_shaders.data[sub_shader]);
+ }
+ meta_push(m, s8("}"));
+ } else {
+ meta_push(m, s8(", 0"));
+ }
+ meta_end_line(m, s8("},"));
+ }
+ meta_end_scope(m, s8("};\n"));
+
+ meta_begin_scope(m, s8("read_only global s8 beamformer_reloadable_shader_files[] = {"));
+ for (iz shader = 0; shader < ctx->base_shaders.count; shader++) {
+ MetaBaseShader *bs = ctx->base_shaders.data + shader;
+ if (bs->file.len == 0) continue;
+ meta_push_line(m, s8("s8_comp(\""), bs->file, s8("\"),"));
+ }
+ meta_end_scope(m, s8("};\n"));
+
+ {
+ u32 info_index = 0;
+ for (iz group = 0; group < ctx->shader_groups.count; group++) {
+ MetaShaderGroup *sg = ctx->shader_groups.data + group;
+ meta_begin_line(m, s8("read_only global i32 beamformer_reloadable_"));
+ for (iz i = 0; i < sg->name.len; i++)
+ stream_append_byte(&m->stream, TOLOWER(sg->name.data[i]));
+ meta_begin_scope(m, s8("_shader_info_indices[] = {"));
+
+ for (iz shader = 0; shader < sg->shaders.count; shader++) {
+ MetaShader *s = ctx->shaders.data + sg->shaders.data[shader];
+ /* TODO(rnp): store base shader list in a better format */
+ for (iz base_shader = 0; base_shader < ctx->base_shaders.count; base_shader++) {
+ MetaBaseShader *bs = ctx->base_shaders.data + base_shader;
+ if (bs->file.len && bs->shader == s) {
+ meta_indent(m);
+ meta_push_u64(m, info_index++);
+ meta_end_line(m, s8(","));
+ break;
+ }
+ }
+ }
+ meta_end_scope(m, s8("};\n"));
+ }
+ }
+
+ ////////////////////////////////////
+ // NOTE(rnp): shader header strings
+ meta_begin_scope(m, s8("read_only global s8 beamformer_shader_global_header_strings[] = {"));
+ for (iz kind = 0; kind < ctx->permutation_kinds.count; kind++) {
+ s8_list *sub_list = ctx->permutations_for_kind.data + kind;
+ s8 kind_name = push_s8_from_parts(&m->scratch, s8(""), ctx->permutation_kinds.data[kind], s8("_"));
+ meta_push_line(m, s8("s8_comp(\"\""));
+ metagen_push_counted_enum_body(m, kind_name, s8("\"#define "), s8(""), s8("\\n\""),
+ sub_list->data, sub_list->count);
+ meta_push_line(m, s8("\"\\n\"),"));
+ m->scratch = ctx->scratch;
+ }
+ meta_end_scope(m, s8("};\n"));
+
+ meta_begin_scope(m, s8("read_only global s8 beamformer_shader_local_header_strings[] = {"));
+ for (iz shader = 0; shader < ctx->base_shaders.count; shader++) {
+ if (ctx->base_shaders.data[shader].file.len == 0) continue;
+
+ MetaShader *s = ctx->base_shaders.data[shader].shader;
+ s8_list *flag_list = ctx->flags_for_shader.data + s->flag_list_id;
+
+ if (flag_list->count) {
+ meta_push_line(m, s8("s8_comp(\"\""));
+ metagen_push_counted_enum_body(m, s8("ShaderFlags_"), s8("\"#define "), s8("(1 << "), s8(")\\n\""),
+ flag_list->data, flag_list->count);
+ meta_push_line(m, s8("\"\\n\"),"));
+ } else {
+ meta_push_line(m, s8("{0},"));
+ }
+ }
+ meta_end_scope(m, s8("};\n"));
+
+ meta_begin_scope(m, s8("read_only global s8 beamformer_shader_descriptor_header_strings[] = {"));
+ for (iz kind = 0; kind < ctx->permutation_kinds.count; kind++)
+ meta_push_line(m, s8("s8_comp(\""), ctx->permutation_kinds.data[kind], s8("\"),"));
+ meta_end_scope(m, s8("};\n"));
+}
+
+function void
+meta_push_shader_match_helper(MetaprogramContext *m, MetaContext *ctx, MetaShader *s, MetaShaderDescriptor *sd)
+{
+ s8 name = ctx->shader_names.data[s->base_name_id];
+ meta_push_line(m, s8("function iz"));
+ meta_begin_line(m, s8("beamformer_shader_"));
+ for (iz i = 0; i < name.len; i++)
+ stream_append_byte(&m->stream, TOLOWER(name.data[i]));
+ meta_push(m, s8("_match("));
+
+ assert(s->global_flag_ids.count < 27);
+ for (iz flag = 0; flag < s->global_flag_ids.count; flag++) {
+ if (flag != 0) meta_push(m, s8(", "));
+ u32 index = s->global_flag_ids.data[flag];
+ meta_push(m, s8("Beamformer"), ctx->permutation_kinds.data[index], s8(" "));
+ stream_append_byte(&m->stream, (u8)((iz)'a' + flag));
+ }
+ if (sd->has_local_flags) {
+ if (s->global_flag_ids.count) meta_push(m, s8(", "));
+ meta_push(m, s8("i32 flags"));
+ }
+ meta_end_line(m, s8(")"));
+
+ meta_begin_scope(m, s8("{"));
+ meta_begin_line(m, s8("iz result = beamformer_shader_match((i32 []){(i32)"));
+ for (iz flag = 0; flag < s->global_flag_ids.count; flag++) {
+ if (flag != 0) meta_push(m, s8(", (i32)"));
+ stream_append_byte(&m->stream, (u8)((iz)'a' + flag));
+ }
+ if (sd->has_local_flags) {
+ if (s->global_flag_ids.count) meta_push(m, s8(", "));
+ meta_push(m, s8("flags"));
+ }
+ meta_push(m, s8("}, "));
+ meta_push_u64(m, (u64)sd->first_match_vector_index);
+ meta_push(m, s8(", "));
+ meta_push_u64(m, (u64)sd->one_past_last_match_vector_index);
+ meta_push(m, s8(", "));
+ meta_push_u64(m, (u64)sd->sub_field_count + sd->has_local_flags);
+ meta_end_line(m, s8(");"));
+ meta_push_line(m, s8("return result;"));
+ meta_end_scope(m, s8("}\n"));
+}
+
+function b32
+metagen_emit_c_code(MetaContext *ctx, Arena arena)
+{
+ b32 result = 1;
+
+ os_make_directory("generated");
+ char *out = "generated/beamformer.meta.c";
+ if (!needs_rebuild(out, "beamformer.meta"))
+ return result;
+
+ build_log_generate("Core C Code");
- os_make_directory(OUTPUT("matlab/+OGLBeamformerFilter"));
- #define X(kind, ...) {OUTPUT("matlab/+OGLBeamformerFilter/" #kind ".m"), s8_comp(#kind), s8_comp(#__VA_ARGS__)},
- read_only local_persist struct {char *out; s8 class, args;} filter_table[] = {
- BEAMFORMER_FILTER_KIND_LIST(,)
+ MetaprogramContext meta_program = {.stream = arena_stream(arena), .scratch = ctx->scratch};
+ MetaprogramContext *m = &meta_program;
+
+ meta_push_line(m, s8("/* See LICENSE for license details. */\n"));
+ meta_push_line(m, s8("// GENERATED CODE\n"));
+
+ /////////////////////////
+ // NOTE(rnp): enumarents
+ for (iz kind = 0; kind < ctx->permutation_kinds.count; kind++) {
+ s8 enum_name = push_s8_from_parts(&m->scratch, s8(""), s8("Beamformer"), ctx->permutation_kinds.data[kind]);
+ metagen_push_c_enum(m, m->scratch, enum_name, ctx->permutations_for_kind.data[kind].data,
+ ctx->permutations_for_kind.data[kind].count);
+ m->scratch = ctx->scratch;
+ }
+
+ for (iz shader = 0; shader < ctx->base_shaders.count; shader++) {
+ MetaShader *s = ctx->base_shaders.data[shader].shader;
+ s8_list flag_list = ctx->flags_for_shader.data[s->flag_list_id];
+ if (flag_list.count) {
+ s8 enum_name = push_s8_from_parts(&m->scratch, s8(""), s8("BeamformerShader"),
+ ctx->shader_names.data[s->base_name_id], s8("Flags"));
+ metagen_push_c_flag_enum(m, m->scratch, enum_name, flag_list.data, flag_list.count);
+ m->scratch = ctx->scratch;
+ }
+ }
+
+ {
+ s8 kind = s8("BeamformerShaderKind");
+ s8 kind_full = s8("BeamformerShaderKind_");
+ meta_begin_scope(m, s8("typedef enum {"));
+ metagen_push_counted_enum_body(m, kind_full, s8(""), s8("= "), s8(","),
+ ctx->shader_names.data, ctx->shader_names.count);
+ meta_push_line(m, kind_full, s8("Count,\n"));
+
+ s8 *columns[2];
+ columns[0] = push_array(&m->scratch, s8, ctx->shader_groups.count * 3);
+ columns[1] = push_array(&m->scratch, s8, ctx->shader_groups.count * 3);
+
+ for (iz group = 0; group < ctx->shader_groups.count; group++) {
+ MetaShaderGroup *sg = ctx->shader_groups.data + group;
+
+ s8 first_name = ctx->shader_names.data[ctx->shaders.data[sg->shaders.data[0]].base_name_id];
+ s8 last_name = ctx->shader_names.data[ctx->shaders.data[sg->shaders.data[sg->shaders.count - 1]].base_name_id];
+
+ columns[0][3 * group + 0] = push_s8_from_parts(&m->scratch, s8(""), kind, s8("_"), sg->name, s8("First"));
+ columns[1][3 * group + 0] = push_s8_from_parts(&m->scratch, s8(""), s8("= "), kind, s8("_"), first_name);
+
+ columns[0][3 * group + 1] = push_s8_from_parts(&m->scratch, s8(""), kind, s8("_"), sg->name, s8("Last"));
+ columns[1][3 * group + 1] = push_s8_from_parts(&m->scratch, s8(""),s8("= "), kind, s8("_"), last_name);
+
+ columns[0][3 * group + 2] = push_s8_from_parts(&m->scratch, s8(""), kind, s8("_"), sg->name, s8("Count"));
+ Stream sb = arena_stream(m->scratch);
+ stream_append_s8(&sb, s8("= "));
+ stream_append_u64(&sb, (u64)sg->shaders.count);
+ columns[1][3 * group + 2] = arena_stream_commit(&m->scratch, &sb);
+ }
+ metagen_push_table(m, m->scratch, s8(""), s8(","), columns, (uz)ctx->shader_groups.count * 3, 2);
+
+ meta_end_scope(m, s8("} "), kind, s8(";\n"));
+ m->scratch = ctx->scratch;
+ }
+
+ //////////////////////
+ // NOTE(rnp): structs
+ {
+ s8 name = s8_comp("BeamformerShaderDescriptor");
+ s8 types[] = {s8_comp("i32"), s8_comp("i32"), s8_comp("i32"), s8_comp("b32")};
+ s8 names[] = {
+ s8_comp("first_match_vector_index"),
+ s8_comp("one_past_last_match_vector_index"),
+ s8_comp("match_vector_length"),
+ s8_comp("has_local_flags"),
};
- #undef X
+ metagen_push_c_struct(m, name, types, countof(types), names, countof(names));
+ }
- s8_list members = {0};
- for EachElement(filter_table, filter) {
- typeof(*filter_table) *f = filter_table + filter;
- members.count = 0;
- s8_list_from_s8(&members, &scratch, f->args);
- meta_begin_scope(&m, s8("classdef "), f->class, s8(" < OGLBeamformerFilter.BaseFilter"));
-
- meta_begin_scope(&m, s8("properties"));
- for (iz it = 0; it < members.count; it++)
- meta_push_matlab_property(&m, members.data[it], 1);
- meta_end_scope(&m, s8("end"));
-
- meta_begin_scope(&m, s8("methods"));
- meta_begin_line(&m, s8("function obj = "), f->class, s8("("));
- for (iz it = 0; it < members.count; it++)
- meta_push(&m, it > 0 ? s8(", ") : s8(""), members.data[it]);
- meta_end_line(&m, s8(")"));
-
- m.indentation_level++;
- for (iz it = 0; it < members.count; it++)
- meta_push_line(&m, s8("obj."), members.data[it], s8(" = "), members.data[it], s8(";"));
- result &= meta_end_and_write_matlab(&m, f->out);
+ {
+ s8 name = s8_comp("BeamformerReloadableShaderInfo");
+ s8 types[] = {s8_comp("BeamformerShaderKind"), s8_comp("i32"), s8_comp("i32 *")};
+ s8 names[] = {
+ s8_comp("kind"),
+ s8_comp("sub_shader_descriptor_index_count"),
+ s8_comp("sub_shader_descriptor_indices"),
+ };
+ metagen_push_c_struct(m, name, types, countof(types), names, countof(names));
+ }
+
+ ///////////////////////////////////////
+ // NOTE(rnp): shader descriptor tables
+ i32 match_vectors_count = 0;
+ meta_begin_scope(m, s8("read_only global i32 *beamformer_shader_match_vectors[] = {"));
+ for (iz shader = 0; shader < ctx->shaders.count; shader++) {
+ MetaShader *s = ctx->shaders.data + shader;
+ MetaShaderDescriptor *sd = ctx->shader_descriptors + shader;
+ metagen_push_shader_derivative_vectors(ctx, m, s, sd->sub_field_count, sd->has_local_flags);
+ match_vectors_count += (i32)s->permutations.count;
+ }
+ meta_end_scope(m, s8("};"));
+ meta_begin_line(m, s8("#define beamformer_match_vectors_count ("));
+ meta_push_u64(m, (u64)match_vectors_count);
+ meta_end_line(m, s8(")\n"));
+
+ meta_push_shader_descriptors_table(m, ctx);
+
+ /////////////////////////////////
+ // NOTE(rnp): shader info tables
+ meta_begin_scope(m, s8("read_only global s8 beamformer_shader_names[] = {"));
+ metagen_push_table(m, m->scratch, s8("s8_comp(\""), s8("\"),"), &ctx->shader_names.data,
+ (uz)ctx->shader_names.count, 1);
+ meta_end_scope(m, s8("};\n"));
+
+ meta_push_shader_reload_info(m, ctx);
+
+ meta_begin_scope(m, s8("read_only global i32 *beamformer_shader_header_vectors[] = {"));
+ for (iz shader = 0; shader < ctx->shaders.count; shader++) {
+ MetaShader *s = ctx->shaders.data + shader;
+
+ if (s->global_flag_ids.count) {
+ meta_begin_line(m, s8("(i32 []){"));
+ for (iz id = 0; id < s->global_flag_ids.count; id++) {
+ if (id != 0) meta_push(m, s8(", "));
+ meta_push_u64(m, s->global_flag_ids.data[id]);
+ }
+ meta_end_line(m, s8("},"));
+ } else {
+ meta_push_line(m, s8("0,"));
}
+ }
+ meta_end_scope(m, s8("};\n"));
+
+ //////////////////////////////////////
+ // NOTE(rnp): shader matching helpers
+ meta_push_line(m, s8("function iz"));
+ meta_push_line(m, s8("beamformer_shader_match(i32 *match_vector, i32 first_index, i32 one_past_last_index, i32 vector_length)"));
+ meta_begin_scope(m, s8("{"));
+ meta_push_line(m, s8("iz result = first_index;"));
+ meta_push_line(m, s8("i32 best_score = 0;"));
+ meta_push_line(m, s8("for (i32 index = first_index; index < one_past_last_index; index++)"));
+ meta_begin_scope(m, s8("{"));
+ meta_push_line(m, s8("i32 score = 0;"));
+ meta_push_line(m, s8("i32 *v = beamformer_shader_match_vectors[index];"));
+ meta_begin_scope(m, s8("for (i32 i = 0; i < vector_length; i++) {"));
+ meta_begin_scope(m, s8("if (match_vector[i] == v[i]) {"));
+ meta_push_line(m, s8("score++;"));
+ meta_end_scope(m, s8("}"));
+ meta_end_scope(m, s8("}"));
+ meta_begin_scope(m, s8("if (best_score < score) {"));
+ meta_push_line(m, s8("result = index;"));
+ meta_push_line(m, s8("best_score = score;"));
+ meta_end_scope(m, s8("}"));
+ meta_end_scope(m, s8("}"));
+ meta_push_line(m, s8("return result;"));
+ meta_end_scope(m, s8("}\n"));
+
+ for (iz shader = 0; shader < ctx->shaders.count; shader++) {
+ MetaShader *s = ctx->shaders.data + shader;
+ MetaShaderDescriptor *sd = ctx->shader_descriptors + shader;
+ if (sd->sub_field_count || sd->has_local_flags)
+ meta_push_shader_match_helper(m, ctx, s, sd);
+ }
- meta_begin_matlab_class(&m, "BaseFilter");
- meta_begin_scope(&m, s8("methods"));
- meta_begin_scope(&m, s8("function out = Flatten(obj)"));
- meta_push_line(&m, s8("fields = struct2cell(struct(obj));"));
- meta_push_line(&m, s8("out = zeros(1, numel(fields));"));
- meta_begin_scope(&m, s8("for i = 1:numel(fields)"));
- meta_push_line(&m, s8("out(i) = fields{i};"));
- result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/+OGLBeamformerFilter/BaseFilter.m"));
-
- #define X(name, __t, __s, elements, ...) meta_push_line(&m, s8(#name "(1," #elements ")"));
- meta_begin_matlab_class(&m, "OGLBeamformerParameters");
- meta_begin_scope(&m, s8("properties"));
- BEAMFORMER_PARAMS_HEAD
- BEAMFORMER_UI_PARAMS
- result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerParameters.m"));
-
- meta_begin_matlab_class(&m, "OGLBeamformerParametersHead");
- meta_begin_scope(&m, s8("properties"));
- BEAMFORMER_PARAMS_HEAD
- result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerParametersHead.m"));
-
- meta_begin_matlab_class(&m, "OGLBeamformerParametersUI");
- meta_begin_scope(&m, s8("properties"));
- BEAMFORMER_UI_PARAMS
- result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerParametersUI.m"));
- #undef X
+ //fprintf(stderr, "%.*s\n", (i32)m.stream.widx, m.stream.data);
- #define X(name, __t, __s, elements, ...) meta_push_matlab_property(&m, s8(#name), elements);
- meta_begin_matlab_class(&m, "OGLBeamformerLiveImagingParameters");
- meta_begin_scope(&m, s8("properties"));
- BEAMFORMER_LIVE_IMAGING_PARAMETERS_LIST
- result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerLiveImagingParameters.m"));
- #undef X
+ result = meta_write_and_reset(m, out);
+
+ return result;
+}
+
+function b32
+metagen_emit_matlab_code(MetaContext *ctx, Arena arena)
+{
+ b32 result = 1;
+ if (!needs_rebuild(OUTPUT("matlab/OGLBeamformerFilterKind.m"), "beamformer_parameters.h"))
+ return result;
+
+ build_log_generate("MATLAB Bindings");
+ /* TODO(rnp): recreate/clear directory incase these file names change */
+ os_make_directory(OUTPUT("matlab"));
+
+ MetaprogramContext meta_program = {.stream = arena_stream(arena), .scratch = ctx->scratch};
+ MetaprogramContext *m = &meta_program;
+
+ #define X(name, flag, ...) meta_push_line(m, s8(#name " (" str(flag) ")"));
+ meta_begin_matlab_class(m, "OGLBeamformerLiveFeedbackFlags", "int32");
+ meta_begin_scope(m, s8("enumeration"));
+ BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST
+ result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerLiveFeedbackFlags.m"));
+ #undef X
+
+ #define X(kind, ...) meta_push_matlab_enum_with_value(m, s8(#kind), BeamformerFilterKind_## kind);
+ meta_begin_matlab_class(m, "OGLBeamformerFilterKind", "int32");
+ meta_begin_scope(m, s8("enumeration"));
+ BEAMFORMER_FILTER_KIND_LIST(,)
+ result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerFilterKind.m"));
+ #undef X
+
+ #define X(kind, ...) meta_push_matlab_enum_with_value(m, s8(#kind), BeamformerTransmitMode_## kind);
+ meta_begin_matlab_class(m, "OGLBeamformerTransmitModes", "int32");
+ meta_begin_scope(m, s8("enumeration"));
+ TRANSMIT_MODES_LIST
+ result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerTransmitModes.m"));
+ #undef X
+
+ #define X(kind, ...) meta_push_matlab_enum_with_value(m, s8(#kind), BeamformerReceiveMode_## kind);
+ meta_begin_matlab_class(m, "OGLBeamformerReceiveModes", "int32");
+ meta_begin_scope(m, s8("enumeration"));
+ RECEIVE_MODES_LIST
+ result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerReceiveModes.m"));
+ #undef X
+
+ os_make_directory(OUTPUT("matlab/+OGLBeamformerFilter"));
+ #define X(kind, ...) {OUTPUT("matlab/+OGLBeamformerFilter/" #kind ".m"), s8_comp(#kind), s8_comp(#__VA_ARGS__)},
+ read_only local_persist struct {char *out; s8 class, args;} filter_table[] = {
+ BEAMFORMER_FILTER_KIND_LIST(,)
+ };
+ #undef X
+
+ s8_list members = {0};
+ for EachElement(filter_table, filter) {
+ typeof(*filter_table) *f = filter_table + filter;
+ members.count = 0;
+ s8_list_from_s8(&members, &m->scratch, f->args);
+ meta_begin_scope(m, s8("classdef "), f->class, s8(" < OGLBeamformerFilter.BaseFilter"));
+
+ meta_begin_scope(m, s8("properties"));
+ for (iz it = 0; it < members.count; it++)
+ meta_push_matlab_property(m, members.data[it], 1);
+ meta_end_scope(m, s8("end"));
+
+ meta_begin_scope(m, s8("methods"));
+ meta_begin_line(m, s8("function obj = "), f->class, s8("("));
+ for (iz it = 0; it < members.count; it++)
+ meta_push(m, it > 0 ? s8(", ") : s8(""), members.data[it]);
+ meta_end_line(m, s8(")"));
+
+ m->indentation_level++;
+ for (iz it = 0; it < members.count; it++)
+ meta_push_line(m, s8("obj."), members.data[it], s8(" = "), members.data[it], s8(";"));
+ result &= meta_end_and_write_matlab(m, f->out);
+ }
+ m->scratch = ctx->scratch;
+
+ meta_begin_matlab_class(m, "BaseFilter");
+ meta_begin_scope(m, s8("methods"));
+ meta_begin_scope(m, s8("function out = Flatten(obj)"));
+ meta_push_line(m, s8("fields = struct2cell(struct(obj));"));
+ meta_push_line(m, s8("out = zeros(1, numel(fields));"));
+ meta_begin_scope(m, s8("for i = 1:numel(fields)"));
+ meta_push_line(m, s8("out(i) = fields{i};"));
+ result &= meta_end_and_write_matlab(m, OUTPUT("matlab/+OGLBeamformerFilter/BaseFilter.m"));
+
+ #define X(name, __t, __s, elements, ...) meta_push_line(m, s8(#name "(1," #elements ")"));
+ meta_begin_matlab_class(m, "OGLBeamformerParameters");
+ meta_begin_scope(m, s8("properties"));
+ BEAMFORMER_PARAMS_HEAD
+ BEAMFORMER_UI_PARAMS
+ result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerParameters.m"));
+
+ meta_begin_matlab_class(m, "OGLBeamformerParametersHead");
+ meta_begin_scope(m, s8("properties"));
+ BEAMFORMER_PARAMS_HEAD
+ result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerParametersHead.m"));
+
+ meta_begin_matlab_class(m, "OGLBeamformerParametersUI");
+ meta_begin_scope(m, s8("properties"));
+ BEAMFORMER_UI_PARAMS
+ result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerParametersUI.m"));
+ #undef X
+
+ #define X(name, __t, __s, elements, ...) meta_push_matlab_property(m, s8(#name), elements);
+ meta_begin_matlab_class(m, "OGLBeamformerLiveImagingParameters");
+ meta_begin_scope(m, s8("properties"));
+ BEAMFORMER_LIVE_IMAGING_PARAMETERS_LIST
+ result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerLiveImagingParameters.m"));
+ #undef X
+
+ meta_begin_matlab_class(m, "OGLBeamformerDataKind", "int32");
+ meta_begin_scope(m, s8("enumeration"));
+ {
+ iz index = meta_lookup_string_slow(&ctx->permutation_kinds, s8("DataKind"));
+ if (index != -1) {
+ s8_list *kinds = ctx->permutations_for_kind.data + index;
+ metagen_push_counted_enum_body(m, s8(""), s8(""), s8("("), s8(")"), kinds->data, kinds->count);
+ } else {
+ build_log_failure("failed to find DataKind enum in meta info\n");
+ }
+ result &= index != -1;
}
+ result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerDataKind.m"));
+ m->scratch = ctx->scratch;
+
+ meta_begin_matlab_class(m, "OGLBeamformerShaderStage", "int32");
+ meta_begin_scope(m, s8("enumeration"));
+ {
+ iz index = -1;
+ for (iz group = 0; group < ctx->shader_groups.count; group++) {
+ if (s8_equal(ctx->shader_groups.data[group].name, s8("Compute"))) {
+ index = group;
+ break;
+ }
+ }
+ if (index != -1) {
+ MetaShaderGroup *sg = ctx->shader_groups.data + index;
+ /* TODO(rnp): this assumes that the shaders are sequential */
+ s8 *names = ctx->shader_names.data + ctx->shaders.data[0].base_name_id;
+ metagen_push_counted_enum_body(m, s8(""), s8(""), s8("("), s8(")"), names, sg->shaders.count);
+ } else {
+ build_log_failure("failed to find Compute shader group in meta info\n");
+ }
+ result &= index != -1;
+ }
+ result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerShaderStage.m"));
+
+ meta_begin_matlab_class(m, "OGLBeamformerSamplingModes", "int32");
+ meta_begin_scope(m, s8("enumeration"));
+ {
+ iz index = meta_lookup_string_slow(&ctx->permutation_kinds, s8("SamplingMode"));
+ if (index != -1) {
+ s8_list *kinds = ctx->permutations_for_kind.data + index;
+ metagen_push_counted_enum_body(m, s8(""), s8("m"), s8("("), s8(")"), kinds->data, kinds->count);
+ } else {
+ build_log_failure("failed to find SamplingModes enum in meta info\n");
+ }
+ result &= index != -1;
+ }
+ result &= meta_end_and_write_matlab(m, OUTPUT("matlab/OGLBeamformerSamplingModes.m"));
+
+ return result;
+}
+
+function b32
+metagen_emit_helper_library_header(MetaContext *ctx, Arena arena)
+{
+ b32 result = 1;
+ char *out = OUTPUT("ogl_beamformer_lib.h");
+ if (!needs_rebuild(out, "helpers/ogl_beamformer_lib_base.h", "beamformer.meta"))
+ return result;
+
+ build_log_generate("Helper Library Header");
+
+ s8 parameters_header = os_read_whole_file(&arena, "beamformer_parameters.h");
+ s8 base_header = os_read_whole_file(&arena, "helpers/ogl_beamformer_lib_base.h");
+
+ MetaprogramContext meta_program = {.stream = arena_stream(arena), .scratch = ctx->scratch};
+ MetaprogramContext *m = &meta_program;
+
+ meta_push_line(m, s8("/* See LICENSE for license details. */\n"));
+ meta_push_line(m, s8("// GENERATED CODE\n"));
+
+ {
+ iz index = meta_lookup_string_slow(&ctx->permutation_kinds, s8("DataKind"));
+ if (index != -1) {
+ s8 enum_name = push_s8_from_parts(&m->scratch, s8(""), s8("Beamformer"), ctx->permutation_kinds.data[index]);
+ metagen_push_c_enum(m, m->scratch, enum_name, ctx->permutations_for_kind.data[index].data,
+ ctx->permutations_for_kind.data[index].count);
+ m->scratch = ctx->scratch;
+ } else {
+ build_log_failure("failed to find DataKind in meta info\n");
+ }
+ }
+
+ {
+ iz index = -1;
+ for (iz group = 0; group < ctx->shader_groups.count; group++) {
+ if (s8_equal(ctx->shader_groups.data[group].name, s8("Compute"))) {
+ index = group;
+ break;
+ }
+ }
+ if (index != -1) {
+ MetaShaderGroup *sg = ctx->shader_groups.data + index;
+ meta_begin_line(m, s8("#define BeamformerShaderKind_ComputeCount ("));
+ meta_push_u64(m, (u64)sg->shaders.count);
+ meta_end_line(m, s8(")\n"));
+ } else {
+ build_log_failure("failed to find Compute shader group in meta info\n");
+ }
+ }
+
+ meta_push(m, parameters_header, base_header);
+ result &= meta_write_and_reset(m, out);
return result;
}
+function MetaContext *
+metagen_load_context(Arena *arena)
+{
+ if (setjmp(compiler_jmp_buf)) {
+ /* NOTE(rnp): compiler error */
+ return 0;
+ }
+
+ MetaContext *ctx = push_struct(arena, MetaContext);
+ ctx->scratch = sub_arena(arena, MB(1), 16);
+ ctx->arena = arena;
+
+ MetaContext *result = ctx;
+
+ Arena scratch = ctx->scratch;
+ MetaEntryStack entries = meta_entry_stack_from_file(ctx->arena, scratch, "beamformer.meta");
+
+ i32 stack_items[32];
+ struct { i32 *data; iz capacity; iz count; } stack = {stack_items, countof(stack_items), 0};
+
+ MetaShaderGroup *current_shader_group = 0;
+ for (iz i = 0; i < entries.count; i++) {
+ MetaEntry *e = entries.data + i;
+ //if (e->kind == MetaEntryKind_EndScope) depth--;
+ //meta_entry_print(e, depth, -1);
+ //if (e->kind == MetaEntryKind_BeginScope) depth++;
+ //continue;
+
+ switch (e->kind) {
+ case MetaEntryKind_BeginScope:{ *da_push(&scratch, &stack) = (i32)(i - 1); }break;
+ case MetaEntryKind_EndScope:{
+ i32 index = stack.data[--stack.count];
+ MetaEntry *ended = entries.data + index;
+ switch (ended->kind) {
+ case MetaEntryKind_ShaderGroup:{ current_shader_group = 0; }break;
+ default:{}break;
+ }
+ }break;
+ case MetaEntryKind_ShaderGroup:{
+ MetaShaderGroup *sg = da_push(ctx->arena, &ctx->shader_groups);
+ sg->name = e->name;
+ current_shader_group = sg;
+ }break;
+ case MetaEntryKind_Shader:{
+ if (!current_shader_group) goto error;
+ i += meta_pack_shader(ctx, current_shader_group, scratch, e, entries.count - i);
+ }break;
+
+ error:
+ default:
+ {
+ meta_entry_error(e, "invalid @%s() in global scope\n", meta_entry_kind_strings[e->kind]);
+ }break;
+ }
+ }
+
+ ctx->shader_descriptors = push_array(ctx->arena, MetaShaderDescriptor, ctx->shaders.count);
+ {
+ i32 match_vectors_count = 0;
+ for (iz shader = 0; shader < ctx->shaders.count; shader++) {
+ MetaShader *s = ctx->shaders.data + shader;
+ MetaShaderDescriptor *sd = ctx->shader_descriptors + shader;
+
+ for (iz perm = 0; perm < s->permutations.count; perm++)
+ sd->has_local_flags |= s->permutations.data[perm].local_flags_count != 0;
+ sd->sub_field_count = (i32)s->global_flag_ids.count;
+ sd->first_match_vector_index = match_vectors_count;
+ match_vectors_count += (i32)s->permutations.count;
+ sd->one_past_last_match_vector_index = match_vectors_count;
+ }
+ }
+
+ result->arena = 0;
+ return result;
+}
+
i32
main(i32 argc, char *argv[])
{
@@ -967,10 +2434,17 @@ main(i32 argc, char *argv[])
Arena arena = os_alloc_arena(MB(8));
check_rebuild_self(arena, argc, argv);
- Options options = parse_options(argc, argv);
-
os_make_directory(OUTDIR);
+ MetaContext *meta = metagen_load_context(&arena);
+ if (!meta) return 1;
+
+ result &= metagen_emit_c_code(meta, arena);
+ result &= metagen_emit_helper_library_header(meta, arena);
+ result &= metagen_emit_matlab_code(meta, arena);
+
+ Options options = parse_options(argc, argv);
+
CommandList c = cmd_base(&arena, &options);
if (!check_build_raylib(arena, c, options.debug)) return 1;
@@ -980,7 +2454,6 @@ main(i32 argc, char *argv[])
/////////////////
// helpers/tests
- result &= build_matlab_bindings(arena);
result &= build_helper_library(arena, c);
if (options.tests) result &= build_tests(arena, c);
diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c
@@ -0,0 +1,278 @@
+/* See LICENSE for license details. */
+
+// GENERATED CODE
+
+typedef enum {
+ BeamformerDataKind_Int16 = 0,
+ BeamformerDataKind_Int16Complex = 1,
+ BeamformerDataKind_Float32 = 2,
+ BeamformerDataKind_Float32Complex = 3,
+ BeamformerDataKind_Count,
+} BeamformerDataKind;
+
+typedef enum {
+ BeamformerSamplingMode_2X = 0,
+ BeamformerSamplingMode_4X = 1,
+ BeamformerSamplingMode_Count,
+} BeamformerSamplingMode;
+
+typedef enum {
+ BeamformerShaderFilterFlags_MapChannels = (1 << 0),
+ BeamformerShaderFilterFlags_ComplexFilter = (1 << 1),
+ BeamformerShaderFilterFlags_Demodulate = (1 << 2),
+} BeamformerShaderFilterFlags;
+
+typedef enum {
+ BeamformerShaderDASFlags_Fast = (1 << 0),
+ BeamformerShaderDASFlags_Sparse = (1 << 1),
+} BeamformerShaderDASFlags;
+
+typedef enum {
+ BeamformerShaderKind_CudaDecode = 0,
+ BeamformerShaderKind_CudaHilbert = 1,
+ BeamformerShaderKind_Decode = 2,
+ BeamformerShaderKind_Filter = 3,
+ BeamformerShaderKind_Demodulate = 4,
+ BeamformerShaderKind_DAS = 5,
+ BeamformerShaderKind_MinMax = 6,
+ BeamformerShaderKind_Sum = 7,
+ BeamformerShaderKind_Render3D = 8,
+ BeamformerShaderKind_Count,
+
+ BeamformerShaderKind_ComputeFirst = BeamformerShaderKind_CudaDecode,
+ BeamformerShaderKind_ComputeLast = BeamformerShaderKind_Sum,
+ BeamformerShaderKind_ComputeCount = 8,
+ BeamformerShaderKind_RenderFirst = BeamformerShaderKind_Render3D,
+ BeamformerShaderKind_RenderLast = BeamformerShaderKind_Render3D,
+ BeamformerShaderKind_RenderCount = 1,
+} BeamformerShaderKind;
+
+typedef struct {
+ i32 first_match_vector_index;
+ i32 one_past_last_match_vector_index;
+ i32 match_vector_length;
+ b32 has_local_flags;
+} BeamformerShaderDescriptor;
+
+typedef struct {
+ BeamformerShaderKind kind;
+ i32 sub_shader_descriptor_index_count;
+ i32 * sub_shader_descriptor_indices;
+} BeamformerReloadableShaderInfo;
+
+read_only global i32 *beamformer_shader_match_vectors[] = {
+ // CudaDecode
+ 0,
+ // CudaHilbert
+ 0,
+ // Decode
+ (i32 []){BeamformerDataKind_Int16},
+ (i32 []){BeamformerDataKind_Int16Complex},
+ (i32 []){BeamformerDataKind_Float32},
+ (i32 []){BeamformerDataKind_Float32Complex},
+ // Filter
+ (i32 []){BeamformerDataKind_Int16Complex, 0x00},
+ (i32 []){BeamformerDataKind_Int16Complex, 0x01},
+ (i32 []){BeamformerDataKind_Int16Complex, 0x02},
+ (i32 []){BeamformerDataKind_Int16Complex, 0x03},
+ (i32 []){BeamformerDataKind_Float32, 0x00},
+ (i32 []){BeamformerDataKind_Float32, 0x01},
+ (i32 []){BeamformerDataKind_Float32, 0x02},
+ (i32 []){BeamformerDataKind_Float32, 0x03},
+ (i32 []){BeamformerDataKind_Float32Complex, 0x00},
+ (i32 []){BeamformerDataKind_Float32Complex, 0x01},
+ (i32 []){BeamformerDataKind_Float32Complex, 0x02},
+ (i32 []){BeamformerDataKind_Float32Complex, 0x03},
+ // Demodulate
+ (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x04},
+ (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x05},
+ (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x06},
+ (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_2X, 0x07},
+ (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x04},
+ (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x05},
+ (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x06},
+ (i32 []){BeamformerDataKind_Int16, BeamformerSamplingMode_4X, 0x07},
+ (i32 []){BeamformerDataKind_Int16, -1, 0x04},
+ (i32 []){BeamformerDataKind_Int16, -1, 0x05},
+ (i32 []){BeamformerDataKind_Int16, -1, 0x06},
+ (i32 []){BeamformerDataKind_Int16, -1, 0x07},
+ (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x04},
+ (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x05},
+ (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x06},
+ (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_2X, 0x07},
+ (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x04},
+ (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x05},
+ (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x06},
+ (i32 []){BeamformerDataKind_Float32, BeamformerSamplingMode_4X, 0x07},
+ (i32 []){BeamformerDataKind_Float32, -1, 0x04},
+ (i32 []){BeamformerDataKind_Float32, -1, 0x05},
+ (i32 []){BeamformerDataKind_Float32, -1, 0x06},
+ (i32 []){BeamformerDataKind_Float32, -1, 0x07},
+ // DAS
+ (i32 []){BeamformerDataKind_Float32, 0x00},
+ (i32 []){BeamformerDataKind_Float32, 0x01},
+ (i32 []){BeamformerDataKind_Float32, 0x02},
+ (i32 []){BeamformerDataKind_Float32, 0x03},
+ (i32 []){BeamformerDataKind_Float32Complex, 0x00},
+ (i32 []){BeamformerDataKind_Float32Complex, 0x01},
+ (i32 []){BeamformerDataKind_Float32Complex, 0x02},
+ (i32 []){BeamformerDataKind_Float32Complex, 0x03},
+ // MinMax
+ 0,
+ // Sum
+ 0,
+ // Render3D
+ 0,
+};
+#define beamformer_match_vectors_count (53)
+
+read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = {
+ {0, 1, 0, 0},
+ {1, 2, 0, 0},
+ {2, 6, 1, 0},
+ {6, 18, 1, 1},
+ {18, 42, 2, 1},
+ {42, 50, 1, 1},
+ {50, 51, 0, 0},
+ {51, 52, 0, 0},
+ {52, 53, 0, 0},
+};
+
+read_only global s8 beamformer_shader_names[] = {
+ s8_comp("CudaDecode"),
+ s8_comp("CudaHilbert"),
+ s8_comp("Decode"),
+ s8_comp("Filter"),
+ s8_comp("Demodulate"),
+ s8_comp("DAS"),
+ s8_comp("MinMax"),
+ s8_comp("Sum"),
+ s8_comp("Render3D"),
+};
+
+read_only global BeamformerReloadableShaderInfo beamformer_reloadable_shader_infos[] = {
+ {BeamformerShaderKind_Decode, 0, 0},
+ {BeamformerShaderKind_Filter, 1, (i32 []){4}},
+ {BeamformerShaderKind_DAS, 0, 0},
+ {BeamformerShaderKind_MinMax, 0, 0},
+ {BeamformerShaderKind_Sum, 0, 0},
+ {BeamformerShaderKind_Render3D, 0, 0},
+};
+
+read_only global s8 beamformer_reloadable_shader_files[] = {
+ s8_comp("decode.glsl"),
+ s8_comp("filter.glsl"),
+ s8_comp("das.glsl"),
+ s8_comp("min_max.glsl"),
+ s8_comp("sum.glsl"),
+ s8_comp("render_3d.frag.glsl"),
+};
+
+read_only global i32 beamformer_reloadable_compute_shader_info_indices[] = {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+};
+
+read_only global i32 beamformer_reloadable_render_shader_info_indices[] = {
+ 5,
+};
+
+read_only global s8 beamformer_shader_global_header_strings[] = {
+ s8_comp(""
+ "#define DataKind_Int16 0\n"
+ "#define DataKind_Int16Complex 1\n"
+ "#define DataKind_Float32 2\n"
+ "#define DataKind_Float32Complex 3\n"
+ "\n"),
+ s8_comp(""
+ "#define SamplingMode_2X 0\n"
+ "#define SamplingMode_4X 1\n"
+ "\n"),
+};
+
+read_only global s8 beamformer_shader_local_header_strings[] = {
+ {0},
+ s8_comp(""
+ "#define ShaderFlags_MapChannels (1 << 0)\n"
+ "#define ShaderFlags_ComplexFilter (1 << 1)\n"
+ "#define ShaderFlags_Demodulate (1 << 2)\n"
+ "\n"),
+ s8_comp(""
+ "#define ShaderFlags_Fast (1 << 0)\n"
+ "#define ShaderFlags_Sparse (1 << 1)\n"
+ "\n"),
+ {0},
+ {0},
+ {0},
+};
+
+read_only global s8 beamformer_shader_descriptor_header_strings[] = {
+ s8_comp("DataKind"),
+ s8_comp("SamplingMode"),
+};
+
+read_only global i32 *beamformer_shader_header_vectors[] = {
+ 0,
+ 0,
+ (i32 []){0},
+ (i32 []){0},
+ (i32 []){0, 1},
+ (i32 []){0},
+ 0,
+ 0,
+ 0,
+};
+
+function iz
+beamformer_shader_match(i32 *match_vector, i32 first_index, i32 one_past_last_index, i32 vector_length)
+{
+ iz result = first_index;
+ i32 best_score = 0;
+ for (i32 index = first_index; index < one_past_last_index; index++)
+ {
+ i32 score = 0;
+ i32 *v = beamformer_shader_match_vectors[index];
+ for (i32 i = 0; i < vector_length; i++) {
+ if (match_vector[i] == v[i]) {
+ score++;
+ }
+ }
+ if (best_score < score) {
+ result = index;
+ best_score = score;
+ }
+ }
+ return result;
+}
+
+function iz
+beamformer_shader_decode_match(BeamformerDataKind a)
+{
+ iz result = beamformer_shader_match((i32 []){(i32)a}, 2, 6, 1);
+ return result;
+}
+
+function iz
+beamformer_shader_filter_match(BeamformerDataKind a, i32 flags)
+{
+ iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 6, 18, 2);
+ return result;
+}
+
+function iz
+beamformer_shader_demodulate_match(BeamformerDataKind a, BeamformerSamplingMode b, i32 flags)
+{
+ iz result = beamformer_shader_match((i32 []){(i32)a, (i32)b, flags}, 18, 42, 3);
+ return result;
+}
+
+function iz
+beamformer_shader_das_match(BeamformerDataKind a, i32 flags)
+{
+ iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 42, 50, 2);
+ return result;
+}
+
diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c
@@ -2,6 +2,7 @@
#include "../compiler.h"
#include "../util.h"
+#include "../generated/beamformer.meta.c"
#include "../beamformer_parameters.h"
#include "ogl_beamformer_lib_base.h"
@@ -225,7 +226,7 @@ validate_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
BF_LIB_ERR_KIND_COMPUTE_STAGE_OVERFLOW))
{
for (u32 i = 0; i < shader_count; i++)
- result &= BETWEEN(shaders[i], 0, BeamformerShaderKind_ComputeCount);
+ result &= BETWEEN(shaders[i], BeamformerShaderKind_ComputeFirst, BeamformerShaderKind_ComputeLast);
if (!result) {
g_beamformer_library_context.last_error = BF_LIB_ERR_KIND_INVALID_COMPUTE_STAGE;
} else if (shaders[0] != BeamformerShaderKind_Demodulate &&
diff --git a/intrinsics.c b/intrinsics.c
@@ -112,6 +112,17 @@ ctz_u32(u32 a)
return result;
}
+function force_inline u64
+ctz_u64(u64 a)
+{
+ u64 result = 64, index;
+ if (a) {
+ _BitScanForward64(&index, a);
+ result = index;
+ }
+ return result;
+}
+
#else /* !COMPILER_MSVC */
function force_inline u32
@@ -130,6 +141,14 @@ ctz_u32(u32 a)
return result;
}
+function force_inline u64
+ctz_u64(u64 a)
+{
+ u64 result = 64;
+ if (a) result = (u64)__builtin_ctzll(a);
+ return result;
+}
+
#endif
#if ARCH_ARM64
diff --git a/os_linux.c b/os_linux.c
@@ -223,17 +223,17 @@ os_unload_library(void *h)
function OS_ADD_FILE_WATCH_FN(os_add_file_watch)
{
s8 directory = path;
- directory.len = s8_scan_backwards(path, '/');
- ASSERT(directory.len > 0);
+ directory.len = s8_scan_backwards(path, OS_PATH_SEPARATOR_CHAR);
+ assert(directory.len > 0);
u64 hash = s8_hash(directory);
FileWatchContext *fwctx = &os->file_watch_context;
FileWatchDirectory *dir = lookup_file_watch_directory(fwctx, hash);
if (!dir) {
- ASSERT(path.data[directory.len] == '/');
+ assert(path.data[directory.len] == OS_PATH_SEPARATOR_CHAR);
dir = da_push(a, fwctx);
dir->hash = hash;
- dir->name = push_s8_zero(a, directory);
+ dir->name = push_s8(a, directory);
u32 mask = IN_MOVED_TO|IN_CLOSE_WRITE;
dir->handle = inotify_add_watch((i32)fwctx->handle, (c8 *)dir->name.data, mask);
}
diff --git a/os_win32.c b/os_win32.c
@@ -338,18 +338,18 @@ os_unload_library(void *h)
function OS_ADD_FILE_WATCH_FN(os_add_file_watch)
{
s8 directory = path;
- directory.len = s8_scan_backwards(path, '\\');
- ASSERT(directory.len > 0);
+ directory.len = s8_scan_backwards(path, OS_PATH_SEPARATOR_CHAR);
+ assert(directory.len > 0);
u64 hash = s8_hash(directory);
FileWatchContext *fwctx = &os->file_watch_context;
FileWatchDirectory *dir = lookup_file_watch_directory(fwctx, hash);
if (!dir) {
- ASSERT(path.data[directory.len] == '\\');
+ assert(path.data[directory.len] == OS_PATH_SEPARATOR_CHAR);
dir = da_push(a, fwctx);
dir->hash = hash;
- dir->name = push_s8_zero(a, directory);
+ dir->name = push_s8(a, directory);
dir->handle = CreateFileA((c8 *)dir->name.data, GENERIC_READ, FILE_SHARE_READ, 0,
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS|FILE_FLAG_OVERLAPPED, 0);
diff --git a/shaders/das.glsl b/shaders/das.glsl
@@ -3,7 +3,9 @@ layout(std430, binding = 1) readonly restrict buffer buffer_1 {
vec2 rf_data[];
};
-#if DAS_FAST
+const bool sparse = (ShaderFlags & ShaderFlags_Sparse) != 0;
+
+#if (ShaderFlags & ShaderFlags_Fast)
layout(rg32f, binding = 0) restrict uniform image3D u_out_data_tex;
#else
layout(rg32f, binding = 0) writeonly restrict uniform image3D u_out_data_tex;
@@ -102,7 +104,7 @@ float cylindrical_wave_transmit_distance(vec3 point, float focal_depth, float tr
return distance(rca_plane_projection(point, tx_rows), f);
}
-#if DAS_FAST
+#if (ShaderFlags & ShaderFlags_Fast)
vec3 RCA(vec3 world_point)
{
bool tx_rows = bool((shader_flags & ShaderFlags_TxColumns) == 0);
@@ -170,10 +172,9 @@ vec3 RCA(vec3 world_point)
}
#endif
-#if DAS_FAST
+#if (ShaderFlags & ShaderFlags_Fast)
vec3 HERCULES(vec3 world_point)
{
- bool uhercules = shader_kind == ShaderKind_UHERCULES;
vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz;
bool tx_rows = bool((shader_flags & ShaderFlags_TxColumns) == 0);
bool rx_cols = bool((shader_flags & ShaderFlags_RxColumns));
@@ -190,8 +191,8 @@ vec3 HERCULES(vec3 world_point)
}
vec2 result = vec2(0);
- for (int transmit = int(uhercules); transmit < acquisition_count; transmit++) {
- int tx_channel = uhercules ? imageLoad(sparse_elements, transmit - int(uhercules)).x : transmit;
+ for (int transmit = int(sparse); transmit < acquisition_count; transmit++) {
+ int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit;
vec3 element_position;
if (rx_cols) element_position = vec3(u_channel, tx_channel, 0) * vec3(xdc_element_pitch, 0);
else element_position = vec3(tx_channel, u_channel, 0) * vec3(xdc_element_pitch, 0);
@@ -211,7 +212,6 @@ vec3 HERCULES(vec3 world_point)
#else
vec3 HERCULES(vec3 world_point)
{
- bool uhercules = shader_kind == ShaderKind_UHERCULES;
vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz;
bool tx_rows = bool((shader_flags & ShaderFlags_TxColumns) == 0);
bool rx_cols = bool((shader_flags & ShaderFlags_RxColumns));
@@ -228,8 +228,8 @@ vec3 HERCULES(vec3 world_point)
}
vec3 result = vec3(0);
- for (int transmit = int(uhercules); transmit < acquisition_count; transmit++) {
- int tx_channel = uhercules ? imageLoad(sparse_elements, transmit - int(uhercules)).x : transmit;
+ for (int transmit = int(sparse); transmit < acquisition_count; transmit++) {
+ int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit;
for (int rx_channel = 0; rx_channel < channel_count; rx_channel++) {
vec3 element_position;
if (rx_cols) element_position = vec3(rx_channel, tx_channel, 0) * vec3(xdc_element_pitch, 0);
@@ -251,10 +251,9 @@ vec3 HERCULES(vec3 world_point)
}
#endif
-#if DAS_FAST
+#if (ShaderFlags & ShaderFlags_Fast)
vec3 FORCES(vec3 world_point)
{
- bool uforces = shader_kind == ShaderKind_UFORCES;
vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz;
float receive_distance = distance(xdc_world_point.xz, vec2(u_channel * xdc_element_pitch.x, 0));
float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.z) *
@@ -262,8 +261,8 @@ vec3 FORCES(vec3 world_point)
vec2 result = vec2(0);
if (apodization > 0) {
- for (int transmit = int(uforces); transmit < acquisition_count; transmit++) {
- int tx_channel = uforces ? imageLoad(sparse_elements, transmit - int(uforces)).x : transmit;
+ for (int transmit = int(sparse); transmit < acquisition_count; transmit++) {
+ int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit;
vec3 transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(channel_count / 2)), 0);
float sidx = sample_index(distance(xdc_world_point, transmit_center) + receive_distance);
@@ -275,7 +274,6 @@ vec3 FORCES(vec3 world_point)
#else
vec3 FORCES(vec3 world_point)
{
- bool uforces = shader_kind == ShaderKind_UFORCES;
vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz;
vec3 result = vec3(0);
@@ -284,8 +282,8 @@ vec3 FORCES(vec3 world_point)
float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.z) *
(xdc_world_point.x - rx_channel * xdc_element_pitch.x));
if (apodization > 0) {
- for (int transmit = int(uforces); transmit < acquisition_count; transmit++) {
- int tx_channel = uforces ? imageLoad(sparse_elements, transmit - int(uforces)).x : transmit;
+ for (int transmit = int(sparse); transmit < acquisition_count; transmit++) {
+ int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit;
vec3 transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(channel_count / 2)), 0);
float sidx = sample_index(distance(xdc_world_point, transmit_center) + receive_distance);
@@ -301,7 +299,7 @@ vec3 FORCES(vec3 world_point)
void main()
{
ivec3 out_voxel = ivec3(gl_GlobalInvocationID);
-#if DAS_FAST
+#if (ShaderFlags & ShaderFlags_Fast)
vec3 sum = vec3(imageLoad(u_out_data_tex, out_voxel).xy, 0);
#else
vec3 sum = vec3(0);
diff --git a/shaders/decode.glsl b/shaders/decode.glsl
@@ -8,19 +8,19 @@
* sample-transmit plane with the bound hadamard matrix.
*/
-#if defined(INPUT_DATA_TYPE_FLOAT)
+#if DataKind == DataKind_Float32
#define INPUT_DATA_TYPE float
#define RF_SAMPLES_PER_INDEX 1
#define RESULT_TYPE_CAST(x) vec4((x), 0, 0, 0)
#define SAMPLE_DATA_TYPE float
#define SAMPLE_TYPE_CAST(x) (x)
-#elif defined(INPUT_DATA_TYPE_FLOAT_COMPLEX)
+#elif DataKind == DataKind_Float32Complex
#define INPUT_DATA_TYPE vec2
#define RF_SAMPLES_PER_INDEX 1
#define RESULT_TYPE_CAST(x) vec4((x), 0, 0)
#define SAMPLE_DATA_TYPE vec2
#define SAMPLE_TYPE_CAST(x) (x)
-#elif defined(INPUT_DATA_TYPE_INT16_COMPLEX)
+#elif DataKind == DataKind_Int16Complex
#define INPUT_DATA_TYPE int
#define RF_SAMPLES_PER_INDEX 1
#define RESULT_TYPE_CAST(x) vec4((x), 0, 0)
diff --git a/shaders/filter.glsl b/shaders/filter.glsl
@@ -1,5 +1,5 @@
/* See LICENSE for license details. */
-#if defined(INPUT_DATA_TYPE_FLOAT)
+#if DataKind == DataKind_Float32
#define DATA_TYPE vec2
#define RESULT_TYPE_CAST(v) (v)
#define SAMPLE_TYPE_CAST(v) (v)
@@ -19,7 +19,7 @@ layout(std430, binding = 2) writeonly restrict buffer buffer_2 {
layout(r16i, binding = 1) readonly restrict uniform iimage1D channel_mapping;
-#if COMPLEX_FILTER
+#if (ShaderFlags & ShaderFlags_ComplexFilter)
layout(rg32f, binding = 0) readonly restrict uniform image1D filter_coefficients;
#define apply_filter(iq, h) complex_mul((iq), (h).xy)
#else
@@ -27,6 +27,8 @@ layout(r16i, binding = 1) readonly restrict uniform iimage1D channel_mapping;
#define apply_filter(iq, h) ((iq) * (h).x)
#endif
+const bool map_channels = (ShaderFlags & ShaderFlags_MapChannels) != 0;
+
vec2 complex_mul(vec2 a, vec2 b)
{
mat2 m = mat2(b.x, b.y, -b.y, b.x);
@@ -34,12 +36,12 @@ vec2 complex_mul(vec2 a, vec2 b)
return result;
}
+#if (ShaderFlags & ShaderFlags_Demodulate)
vec2 rotate_iq(vec2 iq, int index)
{
vec2 result;
- /* TODO(rnp): this doesn't give us the same performance boost as hardcoding the mode */
- switch (shader_flags & ShaderFlags_SamplingModeMask) {
- case SamplingMode_NS200BW:{
+ switch (SamplingMode) {
+ case SamplingMode_4X:{
// fs = 2 * fd
// arg = PI * index
// cos -> 1 -1 1 -1
@@ -48,7 +50,7 @@ vec2 rotate_iq(vec2 iq, int index)
if (bool(index & 1)) result = mat2(-1, 0, 0, -1) * iq;
else result = mat2( 1, 0, 0, 1) * iq;
}break;
- case SamplingMode_BS100BW:{
+ case SamplingMode_2X:{
// fs = fd
// arg = 2 * PI * index
// cos -> 1 1 1 1
@@ -64,6 +66,7 @@ vec2 rotate_iq(vec2 iq, int index)
}
return result;
}
+#endif
vec2 sample_rf(uint index)
{
@@ -78,7 +81,6 @@ void main()
uint channel = gl_GlobalInvocationID.y;
uint transmit = gl_GlobalInvocationID.z;
- bool map_channels = bool(shader_flags & ShaderFlags_MapChannels);
uint in_channel = map_channels ? imageLoad(channel_mapping, int(channel)).x : channel;
uint in_offset = input_channel_stride * in_channel + input_transmit_stride * transmit;
uint out_offset = output_channel_stride * channel +
@@ -100,12 +102,12 @@ void main()
int b_length = imageSize(filter_coefficients).x;
int index = int(in_sample);
- const float scale = bool(COMPLEX_FILTER) ? 1 : sqrt(2);
+ const float scale = bool(ShaderFlags & ShaderFlags_ComplexFilter) ? 1 : sqrt(2);
for (int j = max(0, index - b_length); j < min(index, a_length); j++) {
vec2 iq = sample_rf(in_offset + j);
vec4 h = imageLoad(filter_coefficients, index - j);
- #if defined(DEMODULATE)
+ #if (ShaderFlags & ShaderFlags_Demodulate)
result += scale * apply_filter(rotate_iq(iq * vec2(1, -1), -j), h);
#else
result += apply_filter(iq, h);
diff --git a/static.c b/static.c
@@ -74,8 +74,6 @@ debug_init(OS *os, iptr input, Arena *arena)
#endif /* _DEBUG */
-#define static_path_join(a, b) (a OS_PATH_SEPARATOR b)
-
struct gl_debug_ctx {
Stream stream;
iptr os_error_handle;
@@ -172,8 +170,9 @@ dump_gl_params(GLParams *gl, Arena a, OS *os)
function FILE_WATCH_CALLBACK_FN(reload_shader)
{
- ShaderReloadContext *ctx = (typeof(ctx))user_data;
- return beamformer_reload_shader(os, ctx->beamformer_context, ctx, arena, ctx->name);
+ ShaderReloadContext *ctx = (typeof(ctx))user_data;
+ BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + ctx->reloadable_info_index;
+ return beamformer_reload_shader(os, path, ctx, arena, beamformer_shader_names[rsi->kind]);
}
function FILE_WATCH_CALLBACK_FN(reload_shader_indirect)
@@ -321,6 +320,9 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input
Stream error = stream_alloc(memory, MB(1));
Arena ui_arena = sub_arena(memory, MB(2), KB(4));
+ Arena scratch = {.beg = memory->end - 4096L, .end = memory->end};
+ memory->end = scratch.beg;
+
BeamformerCtx *ctx = *o_ctx = push_struct(memory, typeof(*ctx));
BeamformerInput *input = *o_input = push_struct(memory, typeof(*input));
@@ -330,6 +332,7 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input
input->executable_reloaded = 1;
os_init(&ctx->os, memory);
+ ctx->os.path_separator = s8(OS_PATH_SEPARATOR);
ctx->os.compute_worker.arena = compute_arena;
ctx->os.compute_worker.asleep = 1;
ctx->os.upload_worker.arena = upload_arena;
@@ -428,21 +431,23 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input
#undef X
};
- #define X(e, f, ...) do if (s8(f).len > 0) { \
- ShaderReloadContext *src = push_struct(memory, typeof(*src)); \
- src->beamformer_context = ctx; \
- src->header = compute_headers[BeamformerShaderKind_##e]; \
- src->path = s8(static_path_join("shaders", f ".glsl")); \
- src->name = src->path; \
- src->shader = cs->programs + BeamformerShaderKind_##e; \
- src->gl_type = GL_COMPUTE_SHADER; \
- src->kind = BeamformerShaderKind_##e; \
- src->link = src; \
- os_add_file_watch(&ctx->os, memory, src->path, reload_shader_indirect, (iptr)src); \
- reload_shader_indirect(&ctx->os, src->path, (iptr)src, *memory); \
- } while (0);
- COMPUTE_SHADERS_INTERNAL
- #undef X
+ for EachElement(beamformer_reloadable_compute_shader_info_indices, it) {
+ i32 index = beamformer_reloadable_compute_shader_info_indices[it];
+ Arena temp = scratch;
+
+ s8 file = push_s8_from_parts(&temp, s8(OS_PATH_SEPARATOR), s8("shaders"),
+ beamformer_reloadable_shader_files[index]);
+
+ BeamformerReloadableShaderInfo *rsi = beamformer_reloadable_shader_infos + index;
+ ShaderReloadContext *src = push_struct(memory, typeof(*src));
+ src->beamformer_context = ctx;
+ src->reloadable_info_index = index;
+ src->link = src;
+ src->header = compute_headers[rsi->kind];
+ src->gl_type = GL_COMPUTE_SHADER;
+ os_add_file_watch(&ctx->os, memory, file, reload_shader_indirect, (iptr)src);
+ reload_shader_indirect(&ctx->os, file, (iptr)src, *memory);
+ }
os_wake_waiters(&worker->sync_variable);
FrameViewRenderContext *fvr = &ctx->frame_view_render_context;
@@ -457,13 +462,16 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input
glNamedRenderbufferStorageMultisample(fvr->renderbuffers[1], msaa_samples, GL_DEPTH_COMPONENT24,
FRAME_VIEW_RENDER_TARGET_SIZE);
+ static_assert(countof(beamformer_reloadable_render_shader_info_indices) == 1,
+ "only a single render shader is currently handled");
+ i32 render_rsi_index = beamformer_reloadable_render_shader_info_indices[0];
+
+ s8 render_file = push_s8_from_parts(&scratch, s8(OS_PATH_SEPARATOR), s8("shaders"),
+ beamformer_reloadable_shader_files[render_rsi_index]);
ShaderReloadContext *render_3d = push_struct(memory, typeof(*render_3d));
- render_3d->beamformer_context = ctx;
- render_3d->path = s8(static_path_join("shaders", "render_3d.frag.glsl"));
- render_3d->name = s8("shaders/render_3d.glsl");
+ render_3d->beamformer_context = ctx;
+ render_3d->reloadable_info_index = render_rsi_index;
render_3d->gl_type = GL_FRAGMENT_SHADER;
- render_3d->kind = BeamformerShaderKind_Render3D;
- render_3d->shader = &fvr->shader;
render_3d->header = s8(""
"layout(location = 0) in vec3 normal;\n"
"layout(location = 1) in vec3 texture_coordinate;\n\n"
@@ -480,6 +488,7 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input
"layout(binding = 0) uniform sampler3D u_texture;\n");
render_3d->link = push_struct(memory, typeof(*render_3d));
+ render_3d->link->reloadable_info_index = -1;
render_3d->link->gl_type = GL_VERTEX_SHADER;
render_3d->link->link = render_3d;
render_3d->link->header = s8(""
@@ -507,8 +516,8 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input
"\tf_normal = v_normal;\n"
"\tgl_Position = u_projection * u_view * u_model * vec4(pos, 1);\n"
"}\n");
- reload_shader(&ctx->os, render_3d->path, (iptr)render_3d, *memory);
- os_add_file_watch(&ctx->os, memory, render_3d->path, reload_shader, (iptr)render_3d);
+ reload_shader(&ctx->os, render_file, (iptr)render_3d, *memory);
+ os_add_file_watch(&ctx->os, memory, render_file, reload_shader, (iptr)render_3d);
f32 unit_cube_vertices[] = {
0.5f, 0.5f, -0.5f,
@@ -580,6 +589,8 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input
cs->unit_cube_model = render_model_from_arrays(unit_cube_vertices, unit_cube_normals,
sizeof(unit_cube_vertices),
unit_cube_indices, countof(unit_cube_indices));
+
+ memory->end = scratch.end;
}
function void
diff --git a/ui.c b/ui.c
@@ -2644,10 +2644,6 @@ draw_compute_stats_bar_view(BeamformerUI *ui, Arena arena, ComputeShaderStats *s
}
}
- #define X(e, s, pn, ...) [BeamformerShaderKind_##e] = s8_comp(pn ": "),
- read_only local_persist s8 labels[BeamformerShaderKind_ComputeCount] = {COMPUTE_SHADERS_INTERNAL};
- #undef X
-
v2 result = table_extent(table, arena, ts.font);
f32 remaining_width = r.size.w - result.w - table->cell_pad.w;
@@ -2678,8 +2674,8 @@ draw_compute_stats_bar_view(BeamformerUI *ui, Arena arena, ComputeShaderStats *s
DrawRectangleRec(rect.rl, color);
if (point_in_rect(mouse, rect)) {
text_pos = v2_add(rect.pos, (v2){.x = table->cell_pad.w});
- mouse_text = push_compute_time(&arena, labels[stages[i]],
- stats->table.times[frame_index][stages[i]]);
+ s8 name = push_s8_from_parts(&arena, s8(""), beamformer_shader_names[stages[i]], s8(": "));
+ mouse_text = push_compute_time(&arena, name, stats->table.times[frame_index][stages[i]]);
}
rect.pos.x += rect.size.w;
}
@@ -2707,7 +2703,7 @@ push_table_time_row(Table *table, Arena *arena, s8 label, f32 time)
{
assert(table->columns == 3);
TableCell *cells = table_push_row(table, arena, TRK_CELLS)->data;
- cells[0].text = label;
+ cells[0].text = push_s8_from_parts(arena, s8(""), label, s8(":"));
cells[1].text = push_compute_time(arena, s8(""), time);
cells[2].text = s8("[s]");
}
@@ -2772,12 +2768,9 @@ draw_compute_stats_view(BeamformerUI *ui, Arena arena, Variable *view, Rect r, v
Table *table = table_new(&arena, 2, TextAlignment_Left, TextAlignment_Left, TextAlignment_Left);
switch (csv->kind) {
case ComputeStatsViewKind_Average:{
- #define X(e, n, pn, ...) [BeamformerShaderKind_##e] = s8_comp(pn ":"),
- read_only local_persist s8 labels[BeamformerShaderKind_ComputeCount] = {COMPUTE_SHADERS_INTERNAL};
- #undef X
da_reserve(&arena, table, stages);
for (u32 i = 0; i < stages; i++) {
- push_table_time_row(table, &arena, labels[cp->pipeline.shaders[i]],
+ push_table_time_row(table, &arena, beamformer_shader_names[cp->pipeline.shaders[i]],
stats->average_times[cp->pipeline.shaders[i]]);
}
}break;
diff --git a/util.c b/util.c
@@ -72,11 +72,25 @@ arena_alloc(Arena *a, iz len, uz align, iz count)
return result;
}
-enum { DA_INITIAL_CAP = 4 };
+enum { DA_INITIAL_CAP = 16 };
+
+#define DA_STRUCT(kind, name) typedef struct { \
+ kind *data; \
+ iz count; \
+ iz capacity; \
+} name ##List;
+
+#define da_index(it, s) ((it) - (s)->data)
#define da_reserve(a, s, n) \
(s)->data = da_reserve_((a), (s)->data, &(s)->capacity, (s)->count + n, \
_Alignof(typeof(*(s)->data)), sizeof(*(s)->data))
+#define da_append_count(a, s, items, item_count) do { \
+ da_reserve((a), (s), (item_count)); \
+ mem_copy((s)->data + (s)->count, (items), sizeof(*(items)) * (uz)(item_count)); \
+ (s)->count += (item_count); \
+} while (0)
+
#define da_push(a, s) \
((s)->count == (s)->capacity \
? da_reserve(a, s, 1), \
@@ -198,7 +212,7 @@ function Stream
stream_alloc(Arena *a, i32 cap)
{
Stream result = {.cap = cap};
- result.data = push_array(a, u8, cap);
+ result.data = arena_commit(a, cap);
return result;
}
@@ -415,6 +429,14 @@ arena_stream_commit_zero(Arena *a, Stream *s)
return result;
}
+function s8
+arena_stream_commit_and_reset(Arena *arena, Stream *s)
+{
+ s8 result = arena_stream_commit_zero(arena, s);
+ *s = arena_stream(*arena);
+ return result;
+}
+
/* NOTE(rnp): FNV-1a hash */
function u64
s8_hash(s8 v)
@@ -504,16 +526,32 @@ s8_to_s16(Arena *a, s8 in)
return result;
}
+#define push_s8_from_parts(a, j, ...) push_s8_from_parts_((a), (j), arg_list(s8, __VA_ARGS__))
function s8
-push_s8(Arena *a, s8 str)
+push_s8_from_parts_(Arena *arena, s8 joiner, s8 *parts, iz count)
{
- s8 result = s8_alloc(a, str.len);
- mem_copy(result.data, str.data, (uz)result.len);
+ iz length = joiner.len * (count - 1);
+ for (iz i = 0; i < count; i++)
+ length += parts[i].len;
+
+ s8 result = {.len = length, .data = arena_commit(arena, length + 1)};
+
+ iz offset = 0;
+ for (iz i = 0; i < count; i++) {
+ if (i != 0) {
+ mem_copy(result.data + offset, joiner.data, (uz)joiner.len);
+ offset += joiner.len;
+ }
+ mem_copy(result.data + offset, parts[i].data, (uz)parts[i].len);
+ offset += parts[i].len;
+ }
+ result.data[result.len] = 0;
+
return result;
}
function s8
-push_s8_zero(Arena *a, s8 str)
+push_s8(Arena *a, s8 str)
{
s8 result = s8_alloc(a, str.len + 1);
result.len -= 1;
diff --git a/util.h b/util.h
@@ -56,8 +56,6 @@
#define asan_unpoison_region(...)
#endif
-#define INVALID_CODE_PATH ASSERT(0)
-#define INVALID_DEFAULT_CASE default: ASSERT(0); break
#define InvalidCodePath assert(0)
#define InvalidDefaultCase default: assert(0); break
@@ -87,8 +85,14 @@
#define SIGN(x) ((x) < 0? -1 : 1)
#define swap(a, b) do {typeof(a) __tmp = (a); (a) = (b); (b) = __tmp;} while(0)
+#define ISDIGIT(c) (BETWEEN((c), '0', '9'))
+#define ISUPPER(c) (((c) & 0x20u) == 0)
+#define TOLOWER(c) (((c) | 0x20u))
+#define TOUPPER(c) (((c) & ~(0x20u)))
+
#define f32_cmp(x, y) (ABS((x) - (y)) <= F32_EPSILON * MAX(1.0f, MAX(ABS(x), ABS(y))))
+#define EachBit(a, it) (u64 it = ctz_u64(a); it != 64; a &= ~(1u << (it)), it = ctz_u64(a))
#define EachElement(array, it) (u64 it = 0; it < countof(array); it += 1)
#define EachEnumValue(type, it) (type it = (type)0; it < type##_Count; it = (type)(it + 1))
#define EachNonZeroEnumValue(type, it) (type it = (type)1; it < type##_Count; it = (type)(it + 1))
@@ -128,6 +132,7 @@
#define GB(a) ((u64)(a) << 30ULL)
#define I32_MAX (0x7FFFFFFFL)
+#define U16_MAX (0x0000FFFFUL)
#define U32_MAX (0xFFFFFFFFUL)
#define F32_INFINITY (1e+300*1e+300)
#define F32_EPSILON (1e-6f)
@@ -353,6 +358,7 @@ struct OS {
FileWatchContext file_watch_context;
iptr context;
iptr error_handle;
+ s8 path_separator;
GLWorkerThreadContext compute_worker;
GLWorkerThreadContext upload_worker;
diff --git a/util_gl.c b/util_gl.c
@@ -11,7 +11,7 @@ compile_shader(OS *os, Arena a, u32 type, s8 shader, s8 name)
if (res == GL_FALSE) {
Stream buf = arena_stream(a);
- stream_append_s8s(&buf, name, s8(": failed to compile\n"));
+ stream_append_s8s(&buf, s8("\n"), name, s8(": failed to compile\n"));
i32 len = 0, out_len = 0;
glGetShaderiv(sid, GL_INFO_LOG_LENGTH, &len);
@@ -63,12 +63,7 @@ load_shader(OS *os, Arena arena, s8 *shader_texts, u32 *shader_types, i32 count,
if (valid) result = link_program(os, arena, ids, count);
for (i32 i = 0; i < count; i++) glDeleteShader(ids[i]);
- if (result) {
- Stream buf = arena_stream(arena);
- stream_append_s8s(&buf, s8("loaded: "), name, s8("\n"));
- os_write_file(os->error_handle, stream_to_s8(&buf));
- LABEL_GL_OBJECT(GL_PROGRAM, result, name);
- }
+ if (result) glObjectLabel(GL_PROGRAM, result, (i32)name.len, (c8 *)name.data);
return result;
}