ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: b6d2eb9dcb2c12841a2dc60374e11a7d97ac42fd
Parent: f815e8bb2229b323096765e1d00be10d0d0c57e2
Author: Randy Palamar
Date:   Wed, 24 Sep 2025 21:54:02 -0600

meta: move shader bake parameters into meta code

this will likely replace entirely the permutation mechanism which
is not really useful if the shaders are being JIT compiled.

Diffstat:
Mbeamformer.c | 126++++++++++++++++++++++++++++++++++++++-----------------------------------------
Mbeamformer.h | 6+-----
Mbeamformer.meta | 17+++++++++++------
Mbuild.c | 180++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Mgenerated/beamformer.meta.c | 71+++++++++++++++++++++++++++++++++++++++++++----------------------------
5 files changed, 276 insertions(+), 124 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -4,7 +4,6 @@ * - loop over all active blocks - loop over shader sets per block * - when match found reload it - * [ ]: refactor: move shader bake parameters into meta code * [ ]: measure performance of doing channel mapping in a separate shader * [ ]: BeamformWorkQueue -> BeamformerWorkQueue * [ ]: need to keep track of gpu memory in some way @@ -453,13 +452,19 @@ das_ubo_from_beamformer_parameters(BeamformerComputePlan *cp, BeamformerDASUBO * du->time_offset = bp->time_offset; du->f_number = bp->f_number; - cp->das_shader_kind = bp->das_shader_id; - cp->das_sample_count = bp->sample_count; - cp->das_channel_count = bp->channel_count; - cp->das_acquisition_count = bp->acquisition_count; + cp->das_bake.shader_kind = bp->das_shader_id; + cp->das_bake.sample_count = bp->sample_count; + cp->das_bake.channel_count = bp->channel_count; + cp->das_bake.acquisition_count = bp->acquisition_count; - cp->das_shader_flags = 0; - if (bp->coherency_weighting) cp->das_shader_flags |= BeamformerShaderDASFlags_CoherencyWeighting; + cp->das_bake.shader_flags = 0; + if (bp->coherency_weighting) cp->das_bake.shader_flags |= BeamformerShaderDASFlags_CoherencyWeighting; + else cp->das_bake.shader_flags |= BeamformerShaderDASFlags_Fast; + + if (bp->das_shader_id == BeamformerDASKind_UFORCES || bp->das_shader_id == BeamformerDASKind_UHERCULES) + cp->das_bake.shader_flags |= BeamformerShaderDASFlags_Sparse; + if (bp->interpolate) + cp->das_bake.shader_flags |= BeamformerShaderDASFlags_Interpolate; } function void @@ -539,19 +544,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) commit = 1; }break; case BeamformerShaderKind_DAS:{ - BeamformerDataKind das_data_kind = BeamformerDataKind_Float32; + cp->das_bake.data_kind = BeamformerDataKind_Float32; if (demodulate || run_cuda_hilbert) - das_data_kind = BeamformerDataKind_Float32Complex; - - i32 local_flags = 0; - if ((cp->das_shader_flags & BeamformerShaderDASFlags_CoherencyWeighting) == 0) - local_flags |= BeamformerShaderDASFlags_Fast; - if (cp->das_shader_kind == BeamformerDASKind_UFORCES || cp->das_shader_kind == BeamformerDASKind_UHERCULES) - local_flags |= BeamformerShaderDASFlags_Sparse; - if (pb->parameters.interpolate) - local_flags |= BeamformerShaderDASFlags_Interpolate; - - match = beamformer_shader_das_match(das_data_kind, local_flags); + cp->das_bake.data_kind = BeamformerDataKind_Float32Complex; commit = 1; }break; default:{ @@ -570,8 +565,8 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) cp->pipeline.data_kind = data_kind; u32 das_sample_stride = 1; - u32 das_transmit_stride = cp->das_sample_count; - u32 das_channel_stride = cp->das_acquisition_count * cp->das_sample_count; + u32 das_transmit_stride = cp->das_bake.sample_count; + u32 das_channel_stride = cp->das_bake.acquisition_count * cp->das_bake.sample_count; u32 decimation_rate = MAX(pb->parameters.decimation_rate, 1); if (demodulate) { @@ -580,14 +575,14 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) } u32 input_sample_stride = 1; - u32 input_transmit_stride = cp->das_sample_count; + u32 input_transmit_stride = cp->das_bake.sample_count; u32 input_channel_stride = pb->parameters.raw_data_dimensions[0]; BeamformerDecodeUBO *dp = &cp->decode_ubo_data; dp->decode_mode = pb->parameters.decode; - dp->transmit_count = cp->das_acquisition_count; + dp->transmit_count = cp->das_bake.acquisition_count; - dp->input_sample_stride = decode_first? input_sample_stride : cp->das_acquisition_count; + dp->input_sample_stride = decode_first? input_sample_stride : cp->das_bake.acquisition_count; dp->input_channel_stride = decode_first? input_channel_stride : das_channel_stride; dp->input_transmit_stride = decode_first? input_transmit_stride : 1; dp->output_sample_stride = das_sample_stride; @@ -598,9 +593,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) dp->output_transmit_stride *= decimation_rate; } - cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_sample_count / DECODE_LOCAL_SIZE_X); - cp->decode_dispatch.y = (u32)ceil_f32((f32)cp->das_channel_count / DECODE_LOCAL_SIZE_Y); - cp->decode_dispatch.z = (u32)ceil_f32((f32)cp->das_acquisition_count / DECODE_LOCAL_SIZE_Z); + cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_bake.sample_count / DECODE_LOCAL_SIZE_X); + cp->decode_dispatch.y = (u32)ceil_f32((f32)cp->das_bake.channel_count / DECODE_LOCAL_SIZE_Y); + cp->decode_dispatch.z = (u32)ceil_f32((f32)cp->das_bake.acquisition_count / DECODE_LOCAL_SIZE_Z); /* NOTE(rnp): decode 2 samples per dispatch when data is i16 */ if (decode_first && data_kind == BeamformerDataKind_Int16) @@ -621,8 +616,8 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) mp->sampling_frequency = bp->sampling_frequency / 2; mp->decimation_rate = decimation_rate; - bp->sampling_frequency /= 2 * (f32)mp->decimation_rate; - cp->das_sample_count /= 2 * mp->decimation_rate; + bp->sampling_frequency /= 2 * (f32)mp->decimation_rate; + cp->das_bake.sample_count /= 2 * mp->decimation_rate; if (decode_first) { mp->input_channel_stride = dp->output_channel_stride; @@ -642,16 +637,16 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) mp->output_sample_stride = dp->input_sample_stride; mp->output_transmit_stride = dp->input_transmit_stride; - cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_sample_count / DECODE_LOCAL_SIZE_X); + cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_bake.sample_count / DECODE_LOCAL_SIZE_X); } } /* TODO(rnp): filter may need a different dispatch layout */ - cp->demod_dispatch.x = (u32)ceil_f32((f32)cp->das_sample_count / FILTER_LOCAL_SIZE_X); - cp->demod_dispatch.y = (u32)ceil_f32((f32)cp->das_channel_count / FILTER_LOCAL_SIZE_Y); - cp->demod_dispatch.z = (u32)ceil_f32((f32)cp->das_acquisition_count / FILTER_LOCAL_SIZE_Z); + cp->demod_dispatch.x = (u32)ceil_f32((f32)cp->das_bake.sample_count / FILTER_LOCAL_SIZE_X); + cp->demod_dispatch.y = (u32)ceil_f32((f32)cp->das_bake.channel_count / FILTER_LOCAL_SIZE_Y); + cp->demod_dispatch.z = (u32)ceil_f32((f32)cp->das_bake.acquisition_count / FILTER_LOCAL_SIZE_Z); - cp->rf_size = cp->das_sample_count * cp->das_channel_count * cp->das_acquisition_count; + cp->rf_size = cp->das_bake.sample_count * cp->das_bake.channel_count * cp->das_bake.acquisition_count; if (demodulate || run_cuda_hilbert) cp->rf_size *= 8; else cp->rf_size *= 4; @@ -660,12 +655,12 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) flt->demodulation_frequency = bp->demodulation_frequency; flt->sampling_frequency = bp->sampling_frequency; flt->decimation_rate = 1; - flt->output_channel_stride = cp->das_sample_count * cp->das_acquisition_count; + flt->output_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count; flt->output_sample_stride = 1; - flt->output_transmit_stride = cp->das_sample_count; - flt->input_channel_stride = cp->das_sample_count * cp->das_acquisition_count; + flt->output_transmit_stride = cp->das_bake.sample_count; + flt->input_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count; flt->input_sample_stride = 1; - flt->input_transmit_stride = cp->das_sample_count; + flt->input_transmit_stride = cp->das_bake.sample_count; } function void @@ -749,11 +744,11 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl Stream shader_stream = arena_stream(arena); stream_push_shader_header(&shader_stream, base_shader, compute_headers[base_shader]); - stream_append_s8(&shader_stream, beamformer_shader_local_header_strings[reloadable_index]); - i32 *header_indices = beamformer_shader_header_vectors[sd - beamformer_shader_descriptors]; for (i32 index = 0; index < sd->header_vector_length; index++) - stream_append_s8s(&shader_stream, beamformer_shader_global_header_strings[header_indices[index]], s8("\n")); + stream_append_s8(&shader_stream, beamformer_shader_global_header_strings[header_indices[index]]); + + stream_append_s8(&shader_stream, beamformer_shader_local_header_strings[reloadable_index]); i32 *match_vector = beamformer_shader_match_vectors[cp->shader_matches[shader_slot]]; for (i32 index = 0; index < sd->match_vector_length; index++) { @@ -765,25 +760,26 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl if (sd->has_local_flags) { stream_append_s8(&shader_stream, s8("#define ShaderFlags (0x")); i32 flags = match_vector[sd->match_vector_length]; - if (shader == BeamformerShaderKind_DAS) flags |= cp->das_shader_flags; stream_append_hex_u64(&shader_stream, (u64)flags); stream_append_s8(&shader_stream, s8(")\n")); } - /* TODO(rnp): generate this */ - switch (base_shader) { - case BeamformerShaderKind_DAS:{ - stream_append_s8(&shader_stream, s8("#define ShaderKind ")); - stream_append_u64(&shader_stream, cp->das_shader_kind); - stream_append_s8(&shader_stream, s8("\n#define SampleCount ")); - stream_append_u64(&shader_stream, cp->das_sample_count); - stream_append_s8(&shader_stream, s8("\n#define ChannelCount ")); - stream_append_u64(&shader_stream, cp->das_channel_count); - stream_append_s8(&shader_stream, s8("\n#define AcquisitionCount ")); - stream_append_u64(&shader_stream, cp->das_acquisition_count); - stream_append_s8(&shader_stream, s8("\n")); - }break; - default:{}break; + if (beamformer_shader_bake_parameter_name_counts[reloadable_index]) { + i32 count = beamformer_shader_bake_parameter_name_counts[reloadable_index]; + u32 *parameters = 0; + /* TODO(rnp): generate this */ + switch (base_shader) { + case BeamformerShaderKind_DAS:{ parameters = cp->das_bake.E; }break; + default:{}break; + } + + assert(parameters); + s8 *names = beamformer_shader_bake_parameter_names[reloadable_index]; + for (i32 index = 0; index < count; index++) { + stream_append_s8s(&shader_stream, s8("#define "), names[index], s8(" (0x")); + stream_append_hex_u64(&shader_stream, parameters[index]); + stream_append_s8(&shader_stream, s8(")\n")); + } } stream_append_s8(&shader_stream, s8("\n#line 1\n")); @@ -835,8 +831,8 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, if (ctx->compute_context.ping_pong_ssbo_size < decoded_data_size) alloc_shader_storage(ctx, decoded_data_size, arena); - if (cp->hadamard_order != (i32)cp->das_acquisition_count) - update_hadamard_texture(cp, (i32)cp->das_acquisition_count, arena); + if (cp->hadamard_order != (i32)cp->das_bake.acquisition_count) + update_hadamard_texture(cp, (i32)cp->das_bake.acquisition_count, arena); cp->min_coordinate = v3_from_f32_array(pb->parameters.output_min_coordinate); cp->max_coordinate = v3_from_f32_array(pb->parameters.output_max_coordinate); @@ -969,7 +965,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame case BeamformerShaderKind_DAS:{ local_persist u32 das_cycle_t = 0; - i32 local_flags = match_vector[shader_descriptor->match_vector_length]; + u32 local_flags = cp->das_bake.shader_flags; b32 fast = (local_flags & BeamformerShaderDASFlags_Fast) != 0; b32 sparse = (local_flags & BeamformerShaderDASFlags_Sparse) != 0; @@ -994,14 +990,14 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame if (fast) { i32 loop_end; - if (cp->das_shader_kind == BeamformerDASKind_RCA_VLS || - cp->das_shader_kind == BeamformerDASKind_RCA_TPW) + if (cp->das_bake.shader_kind == BeamformerDASKind_RCA_VLS || + cp->das_bake.shader_kind == BeamformerDASKind_RCA_TPW) { /* NOTE(rnp): to avoid repeatedly sampling the whole focal vectors * texture we loop over transmits for VLS/TPW */ - loop_end = (i32)cp->das_acquisition_count; + loop_end = (i32)cp->das_bake.acquisition_count; } else { - loop_end = (i32)cp->das_channel_count; + loop_end = (i32)cp->das_bake.channel_count; } f32 percent_per_step = 1.0f / (f32)loop_end; cc->processing_progress = -percent_per_step; @@ -1217,8 +1213,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c frame->min_coordinate = cp->min_coordinate; frame->max_coordinate = cp->max_coordinate; - frame->das_kind = cp->das_shader_kind; - frame->compound_count = cp->das_acquisition_count; + frame->das_kind = cp->das_bake.shader_kind; + frame->compound_count = cp->das_bake.acquisition_count; BeamformerComputeContext *cc = &ctx->compute_context; BeamformerComputePipeline *pipeline = &cp->pipeline; diff --git a/beamformer.h b/beamformer.h @@ -224,11 +224,7 @@ struct BeamformerComputePlan { BEAMFORMER_COMPUTE_UBO_LIST #undef X - u32 das_shader_kind; - u32 das_sample_count; - u32 das_channel_count; - u32 das_acquisition_count; - i32 das_shader_flags; + BeamformerShaderDASBakeParameters das_bake; BeamformerComputePlan *next; }; diff --git a/beamformer.meta b/beamformer.meta @@ -38,14 +38,19 @@ @Shader(das.glsl) DAS { - @Permute(DataKind [Float32 Float32Complex]) - { - @PermuteFlags([Fast Sparse Interpolate]) - } - + @Enumeration(DataKind) @Enumeration(RCAOrientation) + @Flags([Fast Sparse Interpolate CoherencyWeighting]) - @Flags([CoherencyWeighting]) + @Bake + { + @BakeVariable(AcquisitionCount acquisition_count) + @BakeVariable(ChannelCount channel_count ) + @BakeVariable(DataKind data_kind ) + @BakeVariable(SampleCount sample_count ) + @BakeVariable(ShaderFlags shader_flags ) + @BakeVariable(ShaderKind shader_kind ) + } } @Shader(min_max.glsl) MinMax diff --git a/build.c b/build.c @@ -2,6 +2,7 @@ /* NOTE: inspired by nob: https://github.com/tsoding/nob.h */ /* TODO(rnp): + * [ ]: refactor: helper for appending expanded shader flags * [ ]: refactor: "base" shaders should only be reloadable shaders * - internally when a shader with no file is encountered it should * not get pushed as a "base" shader. @@ -819,6 +820,8 @@ meta_end_and_write_matlab(MetaprogramContext *m, char *path) #define META_ENTRY_KIND_LIST \ X(Invalid) \ + X(Bake) \ + X(BakeVariable) \ X(BeginScope) \ X(EndScope) \ X(Enumeration) \ @@ -935,6 +938,13 @@ global jmp_buf compiler_jmp_buf; meta_error(); \ } while(0) +#define meta_entry_pair_error(e, prefix, base_kind) \ + meta_entry_error(e, prefix"@%s() in @%s()\n", \ + meta_entry_kind_strings[(e)->kind], \ + meta_entry_kind_strings[(base_kind)]) + +#define meta_entry_nesting_error(e, base_kind) meta_entry_pair_error(e, "invalid nesting: ", base_kind) + #define meta_entry_error_location(e, loc, ...) do { \ meta_compiler_error_message((loc), __VA_ARGS__); \ meta_entry_print((e), 1, (i32)(loc).column); \ @@ -1160,6 +1170,29 @@ meta_parser_arguments(MetaParser *p, MetaEntry *e, Arena *arena) } } +function iz +meta_entry_extract_scope(MetaEntry *base, iz entry_count) +{ + assert(base->kind != MetaEntryKind_BeginScope || base->kind != MetaEntryKind_EndScope); + assert(entry_count > 0); + + MetaEntry *e = base + 1; + iz result, sub_scope = 0; + for (result = 1; result < entry_count; result++, e++) { + switch (e->kind) { + case MetaEntryKind_BeginScope:{ sub_scope++; }break; + case MetaEntryKind_EndScope:{ sub_scope--; }break; + default:{}break; + } + if (sub_scope == 0) break; + } + + if (sub_scope != 0) + meta_entry_error(base, "unclosed scope for entry\n"); + + return result; +} + function MetaEntryStack meta_entry_stack_from_file(Arena *arena, Arena scratch, char *file) { @@ -1245,6 +1278,14 @@ typedef struct { } s8_list_table; typedef struct { + s8 *names_upper; + s8 *names_lower; + u32 entry_count; + u32 shader_id; +} MetaShaderBakeParameters; +DA_STRUCT(MetaShaderBakeParameters, MetaShaderBakeParameters); + +typedef struct { iz kind; iz variation; } MetaEnumeration; @@ -1266,8 +1307,11 @@ typedef struct { MetaShaderPermutationList permutations; MetaIDList global_flag_ids; MetaIDList global_enumeration_ids; + MetaShaderBakeParameters *bake_parameters; u32 base_name_id; u32 flag_list_id; + /* TODO(rnp): temporary: remove when all flags are baked */ + b32 flags_permuted; } MetaShader; DA_STRUCT(MetaShader, MetaShader); @@ -1282,7 +1326,6 @@ typedef struct { i32 first_match_vector_index; i32 one_past_last_match_vector_index; i32 sub_field_count; - b32 has_local_flags; } MetaShaderDescriptor; typedef struct { @@ -1299,15 +1342,15 @@ typedef struct { s8_list_table flags_for_shader; - MetaShaderGroupList shader_groups; - MetaShaderList shaders; - MetaBaseShaderList base_shaders; - s8_list shader_names; + MetaShaderBakeParametersList shader_bake_parameters; + MetaShaderGroupList shader_groups; + MetaShaderList shaders; + MetaBaseShaderList base_shaders; + s8_list shader_names; MetaShaderDescriptor *shader_descriptors; } MetaContext; - function u32 metagen_pack_permutation(MetaContext *ctx, MetaEnumeration e) { @@ -1393,6 +1436,43 @@ meta_intern_id(MetaContext *ctx, MetaIDList *v, u32 id) } function iz +meta_pack_shader_bake_parameters(MetaContext *ctx, MetaEntry *e, iz entry_count, u32 shader_id, u32 *table_id) +{ + assert(e->kind == MetaEntryKind_Bake); + iz result = meta_entry_extract_scope(e, entry_count); + + MetaShaderBakeParameters *bp = da_push(ctx->arena, &ctx->shader_bake_parameters); + bp->shader_id = shader_id; + if (table_id) *table_id = (u32)da_index(bp, &ctx->shader_bake_parameters); + + if (e->argument_count) meta_entry_argument_expected(e); + + if (result > 1) { + MetaEntry *last = e + result; + assert(e[1].kind == MetaEntryKind_BeginScope); + assert(last->kind == MetaEntryKind_EndScope); + + for (MetaEntry *row = e + 2; row != last; row++) { + if (row->kind != MetaEntryKind_BakeVariable) + meta_entry_nesting_error(row, MetaEntryKind_Bake); + meta_entry_argument_expected(row, s8("name"), s8("name_lower")); + bp->entry_count++; + } + + bp->names_upper = push_array(ctx->arena, s8, bp->entry_count); + bp->names_lower = push_array(ctx->arena, s8, bp->entry_count); + + u32 row_index = 0; + for (MetaEntry *row = e + 2; row != last; row++, row_index++) { + bp->names_upper[row_index] = row->arguments[0].string; + bp->names_lower[row_index] = row->arguments[1].string; + } + } + + return result; +} + +function iz meta_enumeration_id(MetaContext *ctx, s8 kind) { iz result = meta_intern_string(ctx, &ctx->enumeration_kinds, kind); @@ -1459,6 +1539,7 @@ meta_pack_shader_permutation(MetaContext *ctx, MetaShaderPermutation *sp, MetaSh u32 cursor = f->cursor.current; switch (e->kind) { case MetaEntryKind_PermuteFlags:{ + base_shader->flags_permuted |= 1; if (f->permutation_id == U32_MAX) { u32 test = cursor, packed = 0; for EachBit(test, flag) { @@ -1486,6 +1567,7 @@ meta_pack_shader_permutation(MetaContext *ctx, MetaShaderPermutation *sp, MetaSh MetaEntryArgument *a = last->arguments; switch (last->kind) { case MetaEntryKind_PermuteFlags:{ + base_shader->flags_permuted |= 1; u32 packed = 0, test = frame_cursor; for EachBit(test, flag) { u32 flag_index = meta_commit_shader_flag(ctx, base_shader->flag_list_id, a->strings[flag], last); @@ -1671,13 +1753,21 @@ meta_pack_shader(MetaContext *ctx, MetaShaderGroup *sg, Arena scratch, MetaEntry for (u32 index = 0; index < flags.count; index++) meta_commit_shader_flag(ctx, s->flag_list_id, flags.strings[index], e); }break; + case MetaEntryKind_Bake:{ + if (s->bake_parameters) { + meta_entry_error(e, "invalid @%s in @%s: only one @%s allowed per @%s\n", + meta_entry_kind_strings[e->kind], meta_entry_kind_strings[MetaEntryKind_Shader], + meta_entry_kind_strings[e->kind], meta_entry_kind_strings[MetaEntryKind_Shader]); + } + u32 table_id; + result += meta_pack_shader_bake_parameters(ctx, e, entry_count - result, (u32)da_index(s, &ctx->shaders), &table_id); + s->bake_parameters = ctx->shader_bake_parameters.data + table_id; + }break; default: error: { - meta_entry_error(e, "invalid nested @%s() in @%s()\n", - meta_entry_kind_strings[e->kind], - meta_entry_kind_strings[MetaEntryKind_Shader]); + meta_entry_nesting_error(e, MetaEntryKind_Shader); }break; } if (stack.count == 0) @@ -1761,12 +1851,12 @@ metagen_push_c_flag_enum(MetaprogramContext *m, Arena scratch, s8 kind, s8 *ids, function void metagen_push_shader_derivative_vectors(MetaContext *ctx, MetaprogramContext *m, MetaShader *s, - i32 sub_field_count, b32 has_local_flags) + i32 sub_field_count) { meta_push_line(m, s8("// "), ctx->shader_names.data[s->base_name_id]); for (iz perm = 0; perm < s->permutations.count; perm++) { MetaShaderPermutation *p = s->permutations.data + perm; - if (!has_local_flags && sub_field_count == 0) { + if (!s->flags_permuted && sub_field_count == 0) { meta_push_line(m, s8("0,")); } else { meta_begin_line(m, s8("(i32 []){")); @@ -1780,7 +1870,7 @@ metagen_push_shader_derivative_vectors(MetaContext *ctx, MetaprogramContext *m, for (i32 id = p->global_flags_count; id < sub_field_count; id++) meta_push(m, s8(", -1")); - if (has_local_flags) { + if (s->flags_permuted) { meta_push(m, s8(", 0x")); meta_push_u64_hex(m, p->local_flags); } @@ -1818,7 +1908,7 @@ meta_push_shader_descriptors_table(MetaprogramContext *m, MetaContext *ctx) stream_append_byte(&sb, ','); columns[3][shader] = arena_stream_commit_and_reset(&m->scratch, &sb); - columns[4][shader] = sd->has_local_flags ? s8("1") : s8 ("0"); + columns[4][shader] = s->flags_permuted ? s8("1") : s8 ("0"); } meta_begin_scope(m, s8("read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = {")); @@ -1949,7 +2039,7 @@ meta_push_shader_match_helper(MetaprogramContext *m, MetaContext *ctx, MetaShade meta_push(m, s8("Beamformer"), ctx->enumeration_kinds.data[index], s8(" ")); stream_append_byte(&m->stream, (u8)((iz)'a' + flag)); } - if (sd->has_local_flags) { + if (s->flags_permuted) { if (s->global_flag_ids.count) meta_push(m, s8(", ")); meta_push(m, s8("i32 flags")); } @@ -1961,7 +2051,7 @@ meta_push_shader_match_helper(MetaprogramContext *m, MetaContext *ctx, MetaShade if (flag != 0) meta_push(m, s8(", (i32)")); stream_append_byte(&m->stream, (u8)((iz)'a' + flag)); } - if (sd->has_local_flags) { + if (s->flags_permuted) { if (s->global_flag_ids.count) meta_push(m, s8(", ")); meta_push(m, s8("flags")); } @@ -1970,7 +2060,7 @@ meta_push_shader_match_helper(MetaprogramContext *m, MetaContext *ctx, MetaShade meta_push(m, s8(", ")); meta_push_u64(m, (u64)sd->one_past_last_match_vector_index); meta_push(m, s8(", ")); - meta_push_u64(m, (u64)sd->sub_field_count + sd->has_local_flags); + meta_push_u64(m, (u64)sd->sub_field_count + s->flags_permuted); meta_end_line(m, s8(");")); meta_push_line(m, s8("return result;")); meta_end_scope(m, s8("}\n")); @@ -2065,6 +2155,24 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena) metagen_push_c_struct(m, name, types, countof(types), names, countof(names)); } + for (u32 bake = 0; bake < ctx->shader_bake_parameters.count; bake++) { + Arena tmp = m->scratch; + MetaShaderBakeParameters *b = ctx->shader_bake_parameters.data + bake; + MetaShader *s = ctx->shaders.data + b->shader_id; + s8 name = push_s8_from_parts(&m->scratch, s8(""), s8("BeamformerShader"), + ctx->shader_names.data[s->base_name_id], s8("BakeParameters")); + meta_begin_scope(m, s8("typedef union {")); + meta_begin_scope(m, s8("struct {")); + for (u32 entry = 0; entry < b->entry_count; entry++) + meta_push_line(m, s8("u32 "), b->names_lower[entry], s8(";")); + meta_end_scope(m, s8("};")); + meta_begin_line(m, s8("u32 E[")); + meta_push_u64(m, b->entry_count); + meta_end_line(m, s8("];")); + meta_end_scope(m, s8("} "), name, s8(";\n")); + m->scratch = tmp; + } + /////////////////////////////////////// // NOTE(rnp): shader descriptor tables i32 match_vectors_count = 0; @@ -2072,7 +2180,7 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena) for (iz shader = 0; shader < ctx->shaders.count; shader++) { MetaShader *s = ctx->shaders.data + shader; MetaShaderDescriptor *sd = ctx->shader_descriptors + shader; - metagen_push_shader_derivative_vectors(ctx, m, s, sd->sub_field_count, sd->has_local_flags); + metagen_push_shader_derivative_vectors(ctx, m, s, sd->sub_field_count); match_vectors_count += (i32)s->permutations.count; } meta_end_scope(m, s8("};")); @@ -2095,7 +2203,7 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena) for (iz shader = 0; shader < ctx->shaders.count; shader++) { MetaShader *s = ctx->shaders.data + shader; - if (s->global_flag_ids.count) { + if (s->global_flag_ids.count || s->global_enumeration_ids.count) { meta_begin_line(m, s8("(i32 []){")); for (iz id = 0; id < s->global_flag_ids.count; id++) { if (id != 0) meta_push(m, s8(", ")); @@ -2112,6 +2220,39 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena) } meta_end_scope(m, s8("};\n")); + meta_begin_scope(m, s8("read_only global s8 *beamformer_shader_bake_parameter_names[] = {")); + for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { + MetaBaseShader *bs = ctx->base_shaders.data + shader; + MetaShader *s = bs->shader; + if (bs->file.len) { + if (s->bake_parameters) { + meta_begin_scope(m, s8("(s8 []){")); + for (u32 index = 0; index < s->bake_parameters->entry_count; index++) + meta_push_line(m, s8("s8_comp(\""), s->bake_parameters->names_upper[index], s8("\"),")); + meta_end_scope(m, s8("},")); + } else { + meta_push_line(m, s8("0,")); + } + } + } + meta_end_scope(m, s8("};\n")); + + meta_begin_scope(m, s8("read_only global i32 beamformer_shader_bake_parameter_name_counts[] = {")); + for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { + MetaBaseShader *bs = ctx->base_shaders.data + shader; + MetaShader *s = bs->shader; + if (bs->file.len) { + if (s->bake_parameters) { + meta_indent(m); + meta_push_u64(m, s->bake_parameters->entry_count); + meta_end_line(m, s8(",")); + } else { + meta_push_line(m, s8("0,")); + } + } + } + meta_end_scope(m, s8("};\n")); + ////////////////////////////////////// // NOTE(rnp): shader matching helpers meta_push_line(m, s8("function iz")); @@ -2139,7 +2280,7 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena) for (iz shader = 0; shader < ctx->shaders.count; shader++) { MetaShader *s = ctx->shaders.data + shader; MetaShaderDescriptor *sd = ctx->shader_descriptors + shader; - if (sd->sub_field_count || sd->has_local_flags) + if (sd->sub_field_count || s->flags_permuted) meta_push_shader_match_helper(m, ctx, s, sd); } @@ -2414,7 +2555,6 @@ metagen_load_context(Arena *arena) MetaShader *s = ctx->shaders.data + shader; MetaShaderDescriptor *sd = ctx->shader_descriptors + shader; - sd->has_local_flags = ctx->flags_for_shader.data[s->flag_list_id].count > 0; sd->sub_field_count = (i32)s->global_flag_ids.count; sd->first_match_vector_index = match_vectors_count; match_vectors_count += (i32)s->permutations.count; diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c @@ -73,6 +73,18 @@ typedef struct { b32 has_local_flags; } BeamformerShaderDescriptor; +typedef union { + struct { + u32 acquisition_count; + u32 channel_count; + u32 data_kind; + u32 sample_count; + u32 shader_flags; + u32 shader_kind; + }; + u32 E[6]; +} BeamformerShaderDASBakeParameters; + read_only global i32 *beamformer_shader_match_vectors[] = { // CudaDecode 0, @@ -123,22 +135,7 @@ read_only global i32 *beamformer_shader_match_vectors[] = { (i32 []){BeamformerDataKind_Float32, -1, 0x06}, (i32 []){BeamformerDataKind_Float32, -1, 0x07}, // DAS - (i32 []){BeamformerDataKind_Float32, 0x00}, - (i32 []){BeamformerDataKind_Float32, 0x01}, - (i32 []){BeamformerDataKind_Float32, 0x02}, - (i32 []){BeamformerDataKind_Float32, 0x03}, - (i32 []){BeamformerDataKind_Float32, 0x04}, - (i32 []){BeamformerDataKind_Float32, 0x05}, - (i32 []){BeamformerDataKind_Float32, 0x06}, - (i32 []){BeamformerDataKind_Float32, 0x07}, - (i32 []){BeamformerDataKind_Float32Complex, 0x00}, - (i32 []){BeamformerDataKind_Float32Complex, 0x01}, - (i32 []){BeamformerDataKind_Float32Complex, 0x02}, - (i32 []){BeamformerDataKind_Float32Complex, 0x03}, - (i32 []){BeamformerDataKind_Float32Complex, 0x04}, - (i32 []){BeamformerDataKind_Float32Complex, 0x05}, - (i32 []){BeamformerDataKind_Float32Complex, 0x06}, - (i32 []){BeamformerDataKind_Float32Complex, 0x07}, + 0, // MinMax 0, // Sum @@ -146,7 +143,7 @@ read_only global i32 *beamformer_shader_match_vectors[] = { // Render3D 0, }; -#define beamformer_match_vectors_count (62) +#define beamformer_match_vectors_count (47) read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = { {0, 1, 0, 0, 0}, @@ -154,10 +151,10 @@ read_only global BeamformerShaderDescriptor beamformer_shader_descriptors[] = { {2, 7, 1, 2, 1}, {7, 19, 1, 1, 1}, {19, 43, 2, 2, 1}, - {43, 59, 1, 2, 1}, - {59, 60, 0, 0, 0}, - {60, 61, 0, 0, 0}, - {61, 62, 0, 0, 0}, + {43, 44, 0, 2, 0}, + {44, 45, 0, 0, 0}, + {45, 46, 0, 0, 0}, + {46, 47, 0, 0, 0}, }; read_only global s8 beamformer_shader_names[] = { @@ -274,6 +271,31 @@ read_only global i32 *beamformer_shader_header_vectors[] = { 0, }; +read_only global s8 *beamformer_shader_bake_parameter_names[] = { + 0, + 0, + (s8 []){ + s8_comp("AcquisitionCount"), + s8_comp("ChannelCount"), + s8_comp("DataKind"), + s8_comp("SampleCount"), + s8_comp("ShaderFlags"), + s8_comp("ShaderKind"), + }, + 0, + 0, + 0, +}; + +read_only global i32 beamformer_shader_bake_parameter_name_counts[] = { + 0, + 0, + 6, + 0, + 0, + 0, +}; + function iz beamformer_shader_match(i32 *match_vector, i32 first_index, i32 one_past_last_index, i32 vector_length) { @@ -317,10 +339,3 @@ beamformer_shader_demodulate_match(BeamformerDataKind a, BeamformerSamplingMode return result; } -function iz -beamformer_shader_das_match(BeamformerDataKind a, i32 flags) -{ - iz result = beamformer_shader_match((i32 []){(i32)a, flags}, 43, 59, 2); - return result; -} -