Commit: b6d2eb9dcb2c12841a2dc60374e11a7d97ac42fd
Parent: f815e8bb2229b323096765e1d00be10d0d0c57e2
Author: Randy Palamar
Date: Wed, 24 Sep 2025 21:54:02 -0600
meta: move shader bake parameters into meta code
this will likely replace entirely the permutation mechanism which
is not really useful if the shaders are being JIT compiled.
Diffstat:
5 files changed, 276 insertions(+), 124 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -4,7 +4,6 @@
* - loop over all active blocks
- loop over shader sets per block
* - when match found reload it
- * [ ]: refactor: move shader bake parameters into meta code
* [ ]: measure performance of doing channel mapping in a separate shader
* [ ]: BeamformWorkQueue -> BeamformerWorkQueue
* [ ]: need to keep track of gpu memory in some way
@@ -453,13 +452,19 @@ das_ubo_from_beamformer_parameters(BeamformerComputePlan *cp, BeamformerDASUBO *
du->time_offset = bp->time_offset;
du->f_number = bp->f_number;
- cp->das_shader_kind = bp->das_shader_id;
- cp->das_sample_count = bp->sample_count;
- cp->das_channel_count = bp->channel_count;
- cp->das_acquisition_count = bp->acquisition_count;
+ cp->das_bake.shader_kind = bp->das_shader_id;
+ cp->das_bake.sample_count = bp->sample_count;
+ cp->das_bake.channel_count = bp->channel_count;
+ cp->das_bake.acquisition_count = bp->acquisition_count;
- cp->das_shader_flags = 0;
- if (bp->coherency_weighting) cp->das_shader_flags |= BeamformerShaderDASFlags_CoherencyWeighting;
+ cp->das_bake.shader_flags = 0;
+ if (bp->coherency_weighting) cp->das_bake.shader_flags |= BeamformerShaderDASFlags_CoherencyWeighting;
+ else cp->das_bake.shader_flags |= BeamformerShaderDASFlags_Fast;
+
+ if (bp->das_shader_id == BeamformerDASKind_UFORCES || bp->das_shader_id == BeamformerDASKind_UHERCULES)
+ cp->das_bake.shader_flags |= BeamformerShaderDASFlags_Sparse;
+ if (bp->interpolate)
+ cp->das_bake.shader_flags |= BeamformerShaderDASFlags_Interpolate;
}
function void
@@ -539,19 +544,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
commit = 1;
}break;
case BeamformerShaderKind_DAS:{
- BeamformerDataKind das_data_kind = BeamformerDataKind_Float32;
+ cp->das_bake.data_kind = BeamformerDataKind_Float32;
if (demodulate || run_cuda_hilbert)
- das_data_kind = BeamformerDataKind_Float32Complex;
-
- i32 local_flags = 0;
- if ((cp->das_shader_flags & BeamformerShaderDASFlags_CoherencyWeighting) == 0)
- local_flags |= BeamformerShaderDASFlags_Fast;
- if (cp->das_shader_kind == BeamformerDASKind_UFORCES || cp->das_shader_kind == BeamformerDASKind_UHERCULES)
- local_flags |= BeamformerShaderDASFlags_Sparse;
- if (pb->parameters.interpolate)
- local_flags |= BeamformerShaderDASFlags_Interpolate;
-
- match = beamformer_shader_das_match(das_data_kind, local_flags);
+ cp->das_bake.data_kind = BeamformerDataKind_Float32Complex;
commit = 1;
}break;
default:{
@@ -570,8 +565,8 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
cp->pipeline.data_kind = data_kind;
u32 das_sample_stride = 1;
- u32 das_transmit_stride = cp->das_sample_count;
- u32 das_channel_stride = cp->das_acquisition_count * cp->das_sample_count;
+ u32 das_transmit_stride = cp->das_bake.sample_count;
+ u32 das_channel_stride = cp->das_bake.acquisition_count * cp->das_bake.sample_count;
u32 decimation_rate = MAX(pb->parameters.decimation_rate, 1);
if (demodulate) {
@@ -580,14 +575,14 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
}
u32 input_sample_stride = 1;
- u32 input_transmit_stride = cp->das_sample_count;
+ u32 input_transmit_stride = cp->das_bake.sample_count;
u32 input_channel_stride = pb->parameters.raw_data_dimensions[0];
BeamformerDecodeUBO *dp = &cp->decode_ubo_data;
dp->decode_mode = pb->parameters.decode;
- dp->transmit_count = cp->das_acquisition_count;
+ dp->transmit_count = cp->das_bake.acquisition_count;
- dp->input_sample_stride = decode_first? input_sample_stride : cp->das_acquisition_count;
+ dp->input_sample_stride = decode_first? input_sample_stride : cp->das_bake.acquisition_count;
dp->input_channel_stride = decode_first? input_channel_stride : das_channel_stride;
dp->input_transmit_stride = decode_first? input_transmit_stride : 1;
dp->output_sample_stride = das_sample_stride;
@@ -598,9 +593,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
dp->output_transmit_stride *= decimation_rate;
}
- cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_sample_count / DECODE_LOCAL_SIZE_X);
- cp->decode_dispatch.y = (u32)ceil_f32((f32)cp->das_channel_count / DECODE_LOCAL_SIZE_Y);
- cp->decode_dispatch.z = (u32)ceil_f32((f32)cp->das_acquisition_count / DECODE_LOCAL_SIZE_Z);
+ cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_bake.sample_count / DECODE_LOCAL_SIZE_X);
+ cp->decode_dispatch.y = (u32)ceil_f32((f32)cp->das_bake.channel_count / DECODE_LOCAL_SIZE_Y);
+ cp->decode_dispatch.z = (u32)ceil_f32((f32)cp->das_bake.acquisition_count / DECODE_LOCAL_SIZE_Z);
/* NOTE(rnp): decode 2 samples per dispatch when data is i16 */
if (decode_first && data_kind == BeamformerDataKind_Int16)
@@ -621,8 +616,8 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
mp->sampling_frequency = bp->sampling_frequency / 2;
mp->decimation_rate = decimation_rate;
- bp->sampling_frequency /= 2 * (f32)mp->decimation_rate;
- cp->das_sample_count /= 2 * mp->decimation_rate;
+ bp->sampling_frequency /= 2 * (f32)mp->decimation_rate;
+ cp->das_bake.sample_count /= 2 * mp->decimation_rate;
if (decode_first) {
mp->input_channel_stride = dp->output_channel_stride;
@@ -642,16 +637,16 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
mp->output_sample_stride = dp->input_sample_stride;
mp->output_transmit_stride = dp->input_transmit_stride;
- cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_sample_count / DECODE_LOCAL_SIZE_X);
+ cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_bake.sample_count / DECODE_LOCAL_SIZE_X);
}
}
/* TODO(rnp): filter may need a different dispatch layout */
- cp->demod_dispatch.x = (u32)ceil_f32((f32)cp->das_sample_count / FILTER_LOCAL_SIZE_X);
- cp->demod_dispatch.y = (u32)ceil_f32((f32)cp->das_channel_count / FILTER_LOCAL_SIZE_Y);
- cp->demod_dispatch.z = (u32)ceil_f32((f32)cp->das_acquisition_count / FILTER_LOCAL_SIZE_Z);
+ cp->demod_dispatch.x = (u32)ceil_f32((f32)cp->das_bake.sample_count / FILTER_LOCAL_SIZE_X);
+ cp->demod_dispatch.y = (u32)ceil_f32((f32)cp->das_bake.channel_count / FILTER_LOCAL_SIZE_Y);
+ cp->demod_dispatch.z = (u32)ceil_f32((f32)cp->das_bake.acquisition_count / FILTER_LOCAL_SIZE_Z);
- cp->rf_size = cp->das_sample_count * cp->das_channel_count * cp->das_acquisition_count;
+ cp->rf_size = cp->das_bake.sample_count * cp->das_bake.channel_count * cp->das_bake.acquisition_count;
if (demodulate || run_cuda_hilbert) cp->rf_size *= 8;
else cp->rf_size *= 4;
@@ -660,12 +655,12 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
flt->demodulation_frequency = bp->demodulation_frequency;
flt->sampling_frequency = bp->sampling_frequency;
flt->decimation_rate = 1;
- flt->output_channel_stride = cp->das_sample_count * cp->das_acquisition_count;
+ flt->output_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count;
flt->output_sample_stride = 1;
- flt->output_transmit_stride = cp->das_sample_count;
- flt->input_channel_stride = cp->das_sample_count * cp->das_acquisition_count;
+ flt->output_transmit_stride = cp->das_bake.sample_count;
+ flt->input_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count;
flt->input_sample_stride = 1;
- flt->input_transmit_stride = cp->das_sample_count;
+ flt->input_transmit_stride = cp->das_bake.sample_count;
}
function void
@@ -749,11 +744,11 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl
Stream shader_stream = arena_stream(arena);
stream_push_shader_header(&shader_stream, base_shader, compute_headers[base_shader]);
- stream_append_s8(&shader_stream, beamformer_shader_local_header_strings[reloadable_index]);
-
i32 *header_indices = beamformer_shader_header_vectors[sd - beamformer_shader_descriptors];
for (i32 index = 0; index < sd->header_vector_length; index++)
- stream_append_s8s(&shader_stream, beamformer_shader_global_header_strings[header_indices[index]], s8("\n"));
+ stream_append_s8(&shader_stream, beamformer_shader_global_header_strings[header_indices[index]]);
+
+ stream_append_s8(&shader_stream, beamformer_shader_local_header_strings[reloadable_index]);
i32 *match_vector = beamformer_shader_match_vectors[cp->shader_matches[shader_slot]];
for (i32 index = 0; index < sd->match_vector_length; index++) {
@@ -765,25 +760,26 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl
if (sd->has_local_flags) {
stream_append_s8(&shader_stream, s8("#define ShaderFlags (0x"));
i32 flags = match_vector[sd->match_vector_length];
- if (shader == BeamformerShaderKind_DAS) flags |= cp->das_shader_flags;
stream_append_hex_u64(&shader_stream, (u64)flags);
stream_append_s8(&shader_stream, s8(")\n"));
}
- /* TODO(rnp): generate this */
- switch (base_shader) {
- case BeamformerShaderKind_DAS:{
- stream_append_s8(&shader_stream, s8("#define ShaderKind "));
- stream_append_u64(&shader_stream, cp->das_shader_kind);
- stream_append_s8(&shader_stream, s8("\n#define SampleCount "));
- stream_append_u64(&shader_stream, cp->das_sample_count);
- stream_append_s8(&shader_stream, s8("\n#define ChannelCount "));
- stream_append_u64(&shader_stream, cp->das_channel_count);
- stream_append_s8(&shader_stream, s8("\n#define AcquisitionCount "));
- stream_append_u64(&shader_stream, cp->das_acquisition_count);
- stream_append_s8(&shader_stream, s8("\n"));
- }break;
- default:{}break;
+ if (beamformer_shader_bake_parameter_name_counts[reloadable_index]) {
+ i32 count = beamformer_shader_bake_parameter_name_counts[reloadable_index];
+ u32 *parameters = 0;
+ /* TODO(rnp): generate this */
+ switch (base_shader) {
+ case BeamformerShaderKind_DAS:{ parameters = cp->das_bake.E; }break;
+ default:{}break;
+ }
+
+ assert(parameters);
+ s8 *names = beamformer_shader_bake_parameter_names[reloadable_index];
+ for (i32 index = 0; index < count; index++) {
+ stream_append_s8s(&shader_stream, s8("#define "), names[index], s8(" (0x"));
+ stream_append_hex_u64(&shader_stream, parameters[index]);
+ stream_append_s8(&shader_stream, s8(")\n"));
+ }
}
stream_append_s8(&shader_stream, s8("\n#line 1\n"));
@@ -835,8 +831,8 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp,
if (ctx->compute_context.ping_pong_ssbo_size < decoded_data_size)
alloc_shader_storage(ctx, decoded_data_size, arena);
- if (cp->hadamard_order != (i32)cp->das_acquisition_count)
- update_hadamard_texture(cp, (i32)cp->das_acquisition_count, arena);
+ if (cp->hadamard_order != (i32)cp->das_bake.acquisition_count)
+ update_hadamard_texture(cp, (i32)cp->das_bake.acquisition_count, arena);
cp->min_coordinate = v3_from_f32_array(pb->parameters.output_min_coordinate);
cp->max_coordinate = v3_from_f32_array(pb->parameters.output_max_coordinate);
@@ -969,7 +965,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame
case BeamformerShaderKind_DAS:{
local_persist u32 das_cycle_t = 0;
- i32 local_flags = match_vector[shader_descriptor->match_vector_length];
+ u32 local_flags = cp->das_bake.shader_flags;
b32 fast = (local_flags & BeamformerShaderDASFlags_Fast) != 0;
b32 sparse = (local_flags & BeamformerShaderDASFlags_Sparse) != 0;
@@ -994,14 +990,14 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame
if (fast) {
i32 loop_end;
- if (cp->das_shader_kind == BeamformerDASKind_RCA_VLS ||
- cp->das_shader_kind == BeamformerDASKind_RCA_TPW)
+ if (cp->das_bake.shader_kind == BeamformerDASKind_RCA_VLS ||
+ cp->das_bake.shader_kind == BeamformerDASKind_RCA_TPW)
{
/* NOTE(rnp): to avoid repeatedly sampling the whole focal vectors
* texture we loop over transmits for VLS/TPW */
- loop_end = (i32)cp->das_acquisition_count;
+ loop_end = (i32)cp->das_bake.acquisition_count;
} else {
- loop_end = (i32)cp->das_channel_count;
+ loop_end = (i32)cp->das_bake.channel_count;
}
f32 percent_per_step = 1.0f / (f32)loop_end;
cc->processing_progress = -percent_per_step;
@@ -1217,8 +1213,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c
frame->min_coordinate = cp->min_coordinate;
frame->max_coordinate = cp->max_coordinate;
- frame->das_kind = cp->das_shader_kind;
- frame->compound_count = cp->das_acquisition_count;
+ frame->das_kind = cp->das_bake.shader_kind;
+ frame->compound_count = cp->das_bake.acquisition_count;
BeamformerComputeContext *cc = &ctx->compute_context;
BeamformerComputePipeline *pipeline = &cp->pipeline;
diff --git a/beamformer.h b/beamformer.h
@@ -224,11 +224,7 @@ struct BeamformerComputePlan {
BEAMFORMER_COMPUTE_UBO_LIST
#undef X
- u32 das_shader_kind;
- u32 das_sample_count;
- u32 das_channel_count;
- u32 das_acquisition_count;
- i32 das_shader_flags;
+ BeamformerShaderDASBakeParameters das_bake;
BeamformerComputePlan *next;
};
diff --git a/beamformer.meta b/beamformer.meta
@@ -38,14 +38,19 @@
@Shader(das.glsl) DAS
{
- @Permute(DataKind [Float32 Float32Complex])
- {
- @PermuteFlags([Fast Sparse Interpolate])
- }
-
+ @Enumeration(DataKind)
@Enumeration(RCAOrientation)
+ @Flags([Fast Sparse Interpolate CoherencyWeighting])
- @Flags([CoherencyWeighting])
+ @Bake
+ {
+ @BakeVariable(AcquisitionCount acquisition_count)
+ @BakeVariable(ChannelCount channel_count )
+ @BakeVariable(DataKind data_kind )
+ @BakeVariable(SampleCount sample_count )
+ @BakeVariable(ShaderFlags shader_flags )
+ @BakeVariable(ShaderKind shader_kind )
+ }
}
@Shader(min_max.glsl) MinMax
diff --git a/build.c b/build.c
@@ -2,6 +2,7 @@
/* NOTE: inspired by nob: https://github.com/tsoding/nob.h */
/* TODO(rnp):
+ * [ ]: refactor: helper for appending expanded shader flags
* [ ]: refactor: "base" shaders should only be reloadable shaders
* - internally when a shader with no file is encountered it should
* not get pushed as a "base" shader.
@@ -819,6 +820,8 @@ meta_end_and_write_matlab(MetaprogramContext *m, char *path)
#define META_ENTRY_KIND_LIST \
X(Invalid) \
+ X(Bake) \
+ X(BakeVariable) \
X(BeginScope) \
X(EndScope) \
X(Enumeration) \
@@ -935,6 +938,13 @@ global jmp_buf compiler_jmp_buf;
meta_error(); \
} while(0)
+#define meta_entry_pair_error(e, prefix, base_kind) \
+ meta_entry_error(e, prefix"@%s() in @%s()\n", \
+ meta_entry_kind_strings[(e)->kind], \
+ meta_entry_kind_strings[(base_kind)])
+
+#define meta_entry_nesting_error(e, base_kind) meta_entry_pair_error(e, "invalid nesting: ", base_kind)
+
#define meta_entry_error_location(e, loc, ...) do { \
meta_compiler_error_message((loc), __VA_ARGS__); \
meta_entry_print((e), 1, (i32)(loc).column); \
@@ -1160,6 +1170,29 @@ meta_parser_arguments(MetaParser *p, MetaEntry *e, Arena *arena)
}
}
+function iz
+meta_entry_extract_scope(MetaEntry *base, iz entry_count)
+{
+ assert(base->kind != MetaEntryKind_BeginScope || base->kind != MetaEntryKind_EndScope);
+ assert(entry_count > 0);
+
+ MetaEntry *e = base + 1;
+ iz result, sub_scope = 0;
+ for (result = 1; result < entry_count; result++, e++) {
+ switch (e->kind) {
+ case MetaEntryKind_BeginScope:{ sub_scope++; }break;
+ case MetaEntryKind_EndScope:{ sub_scope--; }break;
+ default:{}break;
+ }
+ if (sub_scope == 0) break;
+ }
+
+ if (sub_scope != 0)
+ meta_entry_error(base, "unclosed scope for entry\n");
+
+ return result;
+}
+
function MetaEntryStack
meta_entry_stack_from_file(Arena *arena, Arena scratch, char *file)
{
@@ -1245,6 +1278,14 @@ typedef struct {
} s8_list_table;
typedef struct {
+ s8 *names_upper;
+ s8 *names_lower;
+ u32 entry_count;
+ u32 shader_id;
+} MetaShaderBakeParameters;
+DA_STRUCT(MetaShaderBakeParameters, MetaShaderBakeParameters);
+
+typedef struct {
iz kind;
iz variation;
} MetaEnumeration;
@@ -1266,8 +1307,11 @@ typedef struct {
MetaShaderPermutationList permutations;
MetaIDList global_flag_ids;
MetaIDList global_enumeration_ids;
+ MetaShaderBakeParameters *bake_parameters;
u32 base_name_id;
u32 flag_list_id;
+ /* TODO(rnp): temporary: remove when all flags are baked */
+ b32 flags_permuted;
} MetaShader;
DA_STRUCT(MetaShader, MetaShader);
@@ -1282,7 +1326,6 @@ typedef struct {
i32 first_match_vector_index;
i32 one_past_last_match_vector_index;
i32 sub_field_count;
- b32 has_local_flags;
} MetaShaderDescriptor;
typedef struct {
@@ -1299,15 +1342,15 @@ typedef struct {
s8_list_table flags_for_shader;
- MetaShaderGroupList shader_groups;
- MetaShaderList shaders;
- MetaBaseShaderList base_shaders;
- s8_list shader_names;
+ MetaShaderBakeParametersList shader_bake_parameters;
+ MetaShaderGroupList shader_groups;
+ MetaShaderList shaders;
+ MetaBaseShaderList base_shaders;
+ s8_list shader_names;
MetaShaderDescriptor *shader_descriptors;
} MetaContext;
-
function u32
metagen_pack_permutation(MetaContext *ctx, MetaEnumeration e)
{
@@ -1393,6 +1436,43 @@ meta_intern_id(MetaContext *ctx, MetaIDList *v, u32 id)
}
function iz
+meta_pack_shader_bake_parameters(MetaContext *ctx, MetaEntry *e, iz entry_count, u32 shader_id, u32 *table_id)
+{
+ assert(e->kind == MetaEntryKind_Bake);
+ iz result = meta_entry_extract_scope(e, entry_count);
+
+ MetaShaderBakeParameters *bp = da_push(ctx->arena, &ctx->shader_bake_parameters);
+ bp->shader_id = shader_id;
+ if (table_id) *table_id = (u32)da_index(bp, &ctx->shader_bake_parameters);
+
+ if (e->argument_count) meta_entry_argument_expected(e);
+
+ if (result > 1) {
+ MetaEntry *last = e + result;
+ assert(e[1].kind == MetaEntryKind_BeginScope);
+ assert(last->kind == MetaEntryKind_EndScope);
+
+ for (MetaEntry *row = e + 2; row != last; row++) {
+ if (row->kind != MetaEntryKind_BakeVariable)
+ meta_entry_nesting_error(row, MetaEntryKind_Bake);
+ meta_entry_argument_expected(row, s8("name"), s8("name_lower"));
+ bp->entry_count++;
+ }
+
+ bp->names_upper = push_array(ctx->arena, s8, bp->entry_count);
+ bp->names_lower = push_array(ctx->arena, s8, bp->entry_count);
+
+ u32 row_index = 0;
+ for (MetaEntry *row = e + 2; row != last; row++, row_index++) {
+ bp->names_upper[row_index] = row->arguments[0].string;
+ bp->names_lower[row_index] = row->arguments[1].string;
+ }
+ }
+
+ return result;
+}
+
+function iz
meta_enumeration_id(MetaContext *ctx, s8 kind)
{
iz result = meta_intern_string(ctx, &ctx->enumeration_kinds, kind);
@@ -1459,6 +1539,7 @@ meta_pack_shader_permutation(MetaContext *ctx, MetaShaderPermutation *sp, MetaSh
u32 cursor = f->cursor.current;
switch (e->kind) {
case MetaEntryKind_PermuteFlags:{
+ base_shader->flags_permuted |= 1;
if (f->permutation_id == U32_MAX) {
u32 test = cursor, packed = 0;
for EachBit(test, flag) {
@@ -1486,6 +1567,7 @@ meta_pack_shader_permutation(MetaContext *ctx, MetaShaderPermutation *sp, MetaSh
MetaEntryArgument *a = last->arguments;
switch (last->kind) {
case MetaEntryKind_PermuteFlags:{
+ base_shader->flags_permuted |= 1;
u32 packed = 0, test = frame_cursor;
for EachBit(test, flag) {
u32 flag_index = meta_commit_shader_flag(ctx, base_shader->flag_list_id, a->strings[flag], last);
@@ -1671,13 +1753,21 @@ meta_pack_shader(MetaContext *ctx, MetaShaderGroup *sg, Arena scratch, MetaEntry
for (u32 index = 0; index < flags.count; index++)
meta_commit_shader_flag(ctx, s->flag_list_id, flags.strings[index], e);
}break;
+ case MetaEntryKind_Bake:{
+ if (s->bake_parameters) {
+ meta_entry_error(e, "invalid @%s in @%s: only one @%s allowed per @%s\n",
+ meta_entry_kind_strings[e->kind], meta_entry_kind_strings[MetaEntryKind_Shader],
+ meta_entry_kind_strings[e->kind], meta_entry_kind_strings[MetaEntryKind_Shader]);
+ }
+ u32 table_id;
+ result += meta_pack_shader_bake_parameters(ctx, e, entry_count - result, (u32)da_index(s, &ctx->shaders), &table_id);
+ s->bake_parameters = ctx->shader_bake_parameters.data + table_id;
+ }break;
default:
error:
{
- meta_entry_error(e, "invalid nested @%s() in @%s()\n",
- meta_entry_kind_strings[e->kind],
- meta_entry_kind_strings[MetaEntryKind_Shader]);
+ meta_entry_nesting_error(e, MetaEntryKind_Shader);
}break;
}
if (stack.count == 0)
@@ -1761,12 +1851,12 @@ metagen_push_c_flag_enum(MetaprogramContext *m, Arena scratch, s8 kind, s8 *ids,
function void
metagen_push_shader_derivative_vectors(MetaContext *ctx, MetaprogramContext *m, MetaShader *s,
- i32 sub_field_count, b32 has_local_flags)
+ i32 sub_field_count)
{
meta_push_line(m, s8("// "), ctx->shader_names.data[s->base_name_id]);
for (iz perm = 0; perm < s->permutations.count; perm++) {
MetaShaderPermutation *p = s->permutations.data + perm;
- if (!has_local_flags && sub_field_count == 0) {
+ if (!s->flags_permuted && sub_field_count == 0) {
meta_push_line(m, s8("0,"));
} else {
meta_begin_line(m, s8("(i32 []){"));
@@ -1780,7 +1870,7 @@ metagen_push_shader_derivative_vectors(MetaContext *ctx, MetaprogramContext *m,
for (i32 id = p->global_flags_count; id < sub_field_count; id++)
meta_push(m, s8(", -1"));
- if (has_local_flags) {
+ if (s->flags_permuted) {
meta_push(m, s8(", 0x"));
meta_push_u64_hex(m, p->local_flags);
}
@@ -1818,7 +1908,7 @@ meta_push_shader_descriptors_table(MetaprogramContext *m, MetaContext *ctx)
stream_append_byte(&sb, ',');
columns[3][shader] = arena_stream_commit_and_reset(&m->scratch, &sb);
- columns[4][shader] = sd->has_local_flags ? s8("1") : s8 ("0");
+ columns[4][shader] = s->flags_permuted ? s8("1") : s8 ("0");
}
meta_begin_scope(m, s8("read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = {"));
@@ -1949,7 +2039,7 @@ meta_push_shader_match_helper(MetaprogramContext *m, MetaContext *ctx, MetaShade
meta_push(m, s8("Beamformer"), ctx->enumeration_kinds.data[index], s8(" "));
stream_append_byte(&m->stream, (u8)((iz)'a' + flag));
}
- if (sd->has_local_flags) {
+ if (s->flags_permuted) {
if (s->global_flag_ids.count) meta_push(m, s8(", "));
meta_push(m, s8("i32 flags"));
}
@@ -1961,7 +2051,7 @@ meta_push_shader_match_helper(MetaprogramContext *m, MetaContext *ctx, MetaShade
if (flag != 0) meta_push(m, s8(", (i32)"));
stream_append_byte(&m->stream, (u8)((iz)'a' + flag));
}
- if (sd->has_local_flags) {
+ if (s->flags_permuted) {
if (s->global_flag_ids.count) meta_push(m, s8(", "));
meta_push(m, s8("flags"));
}
@@ -1970,7 +2060,7 @@ meta_push_shader_match_helper(MetaprogramContext *m, MetaContext *ctx, MetaShade
meta_push(m, s8(", "));
meta_push_u64(m, (u64)sd->one_past_last_match_vector_index);
meta_push(m, s8(", "));
- meta_push_u64(m, (u64)sd->sub_field_count + sd->has_local_flags);
+ meta_push_u64(m, (u64)sd->sub_field_count + s->flags_permuted);
meta_end_line(m, s8(");"));
meta_push_line(m, s8("return result;"));
meta_end_scope(m, s8("}\n"));
@@ -2065,6 +2155,24 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena)
metagen_push_c_struct(m, name, types, countof(types), names, countof(names));
}
+ for (u32 bake = 0; bake < ctx->shader_bake_parameters.count; bake++) {
+ Arena tmp = m->scratch;
+ MetaShaderBakeParameters *b = ctx->shader_bake_parameters.data + bake;
+ MetaShader *s = ctx->shaders.data + b->shader_id;
+ s8 name = push_s8_from_parts(&m->scratch, s8(""), s8("BeamformerShader"),
+ ctx->shader_names.data[s->base_name_id], s8("BakeParameters"));
+ meta_begin_scope(m, s8("typedef union {"));
+ meta_begin_scope(m, s8("struct {"));
+ for (u32 entry = 0; entry < b->entry_count; entry++)
+ meta_push_line(m, s8("u32 "), b->names_lower[entry], s8(";"));
+ meta_end_scope(m, s8("};"));
+ meta_begin_line(m, s8("u32 E["));
+ meta_push_u64(m, b->entry_count);
+ meta_end_line(m, s8("];"));
+ meta_end_scope(m, s8("} "), name, s8(";\n"));
+ m->scratch = tmp;
+ }
+
///////////////////////////////////////
// NOTE(rnp): shader descriptor tables
i32 match_vectors_count = 0;
@@ -2072,7 +2180,7 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena)
for (iz shader = 0; shader < ctx->shaders.count; shader++) {
MetaShader *s = ctx->shaders.data + shader;
MetaShaderDescriptor *sd = ctx->shader_descriptors + shader;
- metagen_push_shader_derivative_vectors(ctx, m, s, sd->sub_field_count, sd->has_local_flags);
+ metagen_push_shader_derivative_vectors(ctx, m, s, sd->sub_field_count);
match_vectors_count += (i32)s->permutations.count;
}
meta_end_scope(m, s8("};"));
@@ -2095,7 +2203,7 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena)
for (iz shader = 0; shader < ctx->shaders.count; shader++) {
MetaShader *s = ctx->shaders.data + shader;
- if (s->global_flag_ids.count) {
+ if (s->global_flag_ids.count || s->global_enumeration_ids.count) {
meta_begin_line(m, s8("(i32 []){"));
for (iz id = 0; id < s->global_flag_ids.count; id++) {
if (id != 0) meta_push(m, s8(", "));
@@ -2112,6 +2220,39 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena)
}
meta_end_scope(m, s8("};\n"));
+ meta_begin_scope(m, s8("read_only global s8 *beamformer_shader_bake_parameter_names[] = {"));
+ for (iz shader = 0; shader < ctx->base_shaders.count; shader++) {
+ MetaBaseShader *bs = ctx->base_shaders.data + shader;
+ MetaShader *s = bs->shader;
+ if (bs->file.len) {
+ if (s->bake_parameters) {
+ meta_begin_scope(m, s8("(s8 []){"));
+ for (u32 index = 0; index < s->bake_parameters->entry_count; index++)
+ meta_push_line(m, s8("s8_comp(\""), s->bake_parameters->names_upper[index], s8("\"),"));
+ meta_end_scope(m, s8("},"));
+ } else {
+ meta_push_line(m, s8("0,"));
+ }
+ }
+ }
+ meta_end_scope(m, s8("};\n"));
+
+ meta_begin_scope(m, s8("read_only global i32 beamformer_shader_bake_parameter_name_counts[] = {"));
+ for (iz shader = 0; shader < ctx->base_shaders.count; shader++) {
+ MetaBaseShader *bs = ctx->base_shaders.data + shader;
+ MetaShader *s = bs->shader;
+ if (bs->file.len) {
+ if (s->bake_parameters) {
+ meta_indent(m);
+ meta_push_u64(m, s->bake_parameters->entry_count);
+ meta_end_line(m, s8(","));
+ } else {
+ meta_push_line(m, s8("0,"));
+ }
+ }
+ }
+ meta_end_scope(m, s8("};\n"));
+
//////////////////////////////////////
// NOTE(rnp): shader matching helpers
meta_push_line(m, s8("function iz"));
@@ -2139,7 +2280,7 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena)
for (iz shader = 0; shader < ctx->shaders.count; shader++) {
MetaShader *s = ctx->shaders.data + shader;
MetaShaderDescriptor *sd = ctx->shader_descriptors + shader;
- if (sd->sub_field_count || sd->has_local_flags)
+ if (sd->sub_field_count || s->flags_permuted)
meta_push_shader_match_helper(m, ctx, s, sd);
}
@@ -2414,7 +2555,6 @@ metagen_load_context(Arena *arena)
MetaShader *s = ctx->shaders.data + shader;
MetaShaderDescriptor *sd = ctx->shader_descriptors + shader;
- sd->has_local_flags = ctx->flags_for_shader.data[s->flag_list_id].count > 0;
sd->sub_field_count = (i32)s->global_flag_ids.count;
sd->first_match_vector_index = match_vectors_count;
match_vectors_count += (i32)s->permutations.count;
diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c
@@ -73,6 +73,18 @@ typedef struct {
b32 has_local_flags;
} BeamformerShaderDescriptor;
+typedef union {
+ struct {
+ u32 acquisition_count;
+ u32 channel_count;
+ u32 data_kind;
+ u32 sample_count;
+ u32 shader_flags;
+ u32 shader_kind;
+ };
+ u32 E[6];
+} BeamformerShaderDASBakeParameters;
+
read_only global i32 *beamformer_shader_match_vectors[] = {
// CudaDecode
0,
@@ -123,22 +135,7 @@ read_only global i32 *beamformer_shader_match_vectors[] = {
(i32 []){BeamformerDataKind_Float32, -1, 0x06},
(i32 []){BeamformerDataKind_Float32, -1, 0x07},
// DAS
- (i32 []){BeamformerDataKind_Float32, 0x00},
- (i32 []){BeamformerDataKind_Float32, 0x01},
- (i32 []){BeamformerDataKind_Float32, 0x02},
- (i32 []){BeamformerDataKind_Float32, 0x03},
- (i32 []){BeamformerDataKind_Float32, 0x04},
- (i32 []){BeamformerDataKind_Float32, 0x05},
- (i32 []){BeamformerDataKind_Float32, 0x06},
- (i32 []){BeamformerDataKind_Float32, 0x07},
- (i32 []){BeamformerDataKind_Float32Complex, 0x00},
- (i32 []){BeamformerDataKind_Float32Complex, 0x01},
- (i32 []){BeamformerDataKind_Float32Complex, 0x02},
- (i32 []){BeamformerDataKind_Float32Complex, 0x03},
- (i32 []){BeamformerDataKind_Float32Complex, 0x04},
- (i32 []){BeamformerDataKind_Float32Complex, 0x05},
- (i32 []){BeamformerDataKind_Float32Complex, 0x06},
- (i32 []){BeamformerDataKind_Float32Complex, 0x07},
+ 0,
// MinMax
0,
// Sum
@@ -146,7 +143,7 @@ read_only global i32 *beamformer_shader_match_vectors[] = {
// Render3D
0,
};
-#define beamformer_match_vectors_count (62)
+#define beamformer_match_vectors_count (47)
read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = {
{0, 1, 0, 0, 0},
@@ -154,10 +151,10 @@ read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = {
{2, 7, 1, 2, 1},
{7, 19, 1, 1, 1},
{19, 43, 2, 2, 1},
- {43, 59, 1, 2, 1},
- {59, 60, 0, 0, 0},
- {60, 61, 0, 0, 0},
- {61, 62, 0, 0, 0},
+ {43, 44, 0, 2, 0},
+ {44, 45, 0, 0, 0},
+ {45, 46, 0, 0, 0},
+ {46, 47, 0, 0, 0},
};
read_only global s8 beamformer_shader_names[] = {
@@ -274,6 +271,31 @@ read_only global i32 *beamformer_shader_header_vectors[] = {
0,
};
+read_only global s8 *beamformer_shader_bake_parameter_names[] = {
+ 0,
+ 0,
+ (s8 []){
+ s8_comp("AcquisitionCount"),
+ s8_comp("ChannelCount"),
+ s8_comp("DataKind"),
+ s8_comp("SampleCount"),
+ s8_comp("ShaderFlags"),
+ s8_comp("ShaderKind"),
+ },
+ 0,
+ 0,
+ 0,
+};
+
+read_only global i32 beamformer_shader_bake_parameter_name_counts[] = {
+ 0,
+ 0,
+ 6,
+ 0,
+ 0,
+ 0,
+};
+
function iz
beamformer_shader_match(i32 *match_vector, i32 first_index, i32 one_past_last_index, i32 vector_length)
{
@@ -317,10 +339,3 @@ beamformer_shader_demodulate_match(BeamformerDataKind a, BeamformerSamplingMode
return result;
}
-function iz
-beamformer_shader_das_match(BeamformerDataKind a, i32 flags)
-{
- iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 43, 59, 2);
- return result;
-}
-