ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 1d4edb253aed00cf8026c63a4433cd5252939887
Parent: c1412111540f4dd1859b19f21428446e9f78cfb2
Author: Randy Palamar
Date:   Sat, 11 Oct 2025 21:27:02 -0600

core: refactor: move shader bake parameters into a union

Diffstat:
Mbeamformer.c | 382+++++++++++++++++++++++++++++++++++++------------------------------------------
Mbeamformer.h | 14+++++---------
Mbeamformer.meta | 3---
Mbuild.c | 40++++++++++++++++++++++++++++++----------
Mgenerated/beamformer.meta.c | 107++++++++++++++++++++++++++++++++++++++-----------------------------------------
5 files changed, 263 insertions(+), 283 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -438,50 +438,9 @@ das_voxel_transform_matrix(BeamformerParameters *bp) return result; } -function u32 -das_ubo_from_beamformer_parameters(BeamformerComputePlan *cp, BeamformerDASUBO *du, BeamformerParameters *bp) -{ - du->voxel_transform = das_voxel_transform_matrix(bp); - mem_copy(du->xdc_transform.E, bp->xdc_transform, sizeof(du->xdc_transform)); - mem_copy(du->xdc_element_pitch.E, bp->xdc_element_pitch, sizeof(du->xdc_element_pitch)); - cp->das_bake.sampling_frequency = bp->sampling_frequency; - cp->das_bake.demodulation_frequency = bp->demodulation_frequency; - cp->das_bake.speed_of_sound = bp->speed_of_sound; - cp->das_bake.time_offset = bp->time_offset; - cp->das_bake.f_number = bp->f_number; - cp->das_bake.acquisition_kind = bp->das_shader_id; - cp->das_bake.sample_count = bp->sample_count; - cp->das_bake.channel_count = bp->channel_count; - cp->das_bake.acquisition_count = bp->acquisition_count; - cp->das_bake.interpolation_mode = bp->interpolation_mode; - cp->das_bake.transmit_angle = bp->focal_vector[0]; - cp->das_bake.focus_depth = bp->focal_vector[1]; - cp->das_bake.transmit_receive_orientation = bp->transmit_receive_orientation; - - u32 result = 0; - if (bp->coherency_weighting) result |= BeamformerShaderDASFlags_CoherencyWeighting; - else result |= BeamformerShaderDASFlags_Fast; - - if (bp->single_focus) result |= BeamformerShaderDASFlags_SingleFocus; - if (bp->single_orientation) result |= BeamformerShaderDASFlags_SingleOrientation; - - if (bp->das_shader_id == BeamformerAcquisitionKind_UFORCES || bp->das_shader_id == BeamformerAcquisitionKind_UHERCULES) - result |= BeamformerShaderDASFlags_Sparse; - - if (bp->das_shader_id == BeamformerAcquisitionKind_HERO_PA) - result |= BeamformerShaderDASFlags_ReceiveOnly; - - return result; -} - function void plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) { - BeamformerDASUBO *bp = &cp->das_ubo_data; - - u32 das_flags = das_ubo_from_beamformer_parameters(cp, bp, &pb->parameters); - - b32 decode_first = pb->pipeline.shaders[0] == BeamformerShaderKind_Decode; b32 run_cuda_hilbert = 0; b32 demodulate = 0; @@ -497,74 +456,187 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) if (demodulate || run_cuda_hilbert) cp->iq_pipeline = 1; + f32 sampling_frequency = pb->parameters.sampling_frequency; + u32 decimation_rate = MAX(pb->parameters.decimation_rate, 1); + u32 sample_count = pb->parameters.sample_count; + if (demodulate) { + sample_count /= (2 * decimation_rate); + sampling_frequency /= 2 * (f32)decimation_rate; + } + + u32 das_sample_stride = 1; + u32 das_transmit_stride = sample_count; + u32 das_channel_stride = sample_count * pb->parameters.acquisition_count; + + f32 time_offset = pb->parameters.time_offset; + BeamformerDataKind data_kind = pb->pipeline.data_kind; cp->pipeline.shader_count = 0; for (u32 i = 0; i < pb->pipeline.shader_count; i++) { BeamformerShaderParameters *sp = pb->pipeline.parameters + i; + u32 slot = cp->pipeline.shader_count; u32 shader = pb->pipeline.shaders[i]; b32 commit = 0; + BeamformerShaderBakeParameters *lp = cp->shader_bake_parameters + slot - 1; + BeamformerShaderBakeParameters *bp = cp->shader_bake_parameters + slot; + zero_struct(bp); + switch (shader) { case BeamformerShaderKind_CudaHilbert:{ commit = run_cuda_hilbert; }break; case BeamformerShaderKind_Decode:{ /* TODO(rnp): rework decode first and demodulate after */ - BeamformerShaderDecodeBakeParameters *b = &cp->decode_bake; - b->data_kind = data_kind; - if (!decode_first) { + b32 first = slot == 0; + + bp->data_kind = data_kind; + if (!first) { if (data_kind == BeamformerDataKind_Int16) { - b->data_kind = BeamformerDataKind_Int16Complex; + bp->data_kind = BeamformerDataKind_Int16Complex; } else { - b->data_kind = BeamformerDataKind_Float32Complex; + bp->data_kind = BeamformerDataKind_Float32Complex; } } - u32 *flags = cp->shader_flags + cp->pipeline.shader_count; - *flags = 0; - if (run_cuda_hilbert) *flags |= BeamformerShaderDecodeFlags_DilateOutput; + if (run_cuda_hilbert) bp->flags |= BeamformerShaderDecodeFlags_DilateOutput; - commit = 1; - }break; - case BeamformerShaderKind_Demodulate:{ - BeamformerShaderFilterBakeParameters *b = &cp->demodulate_bake; - BeamformerFilter *f = cp->filters + sp->filter_slot; + BeamformerShaderKind *last_shader = cp->pipeline.shaders + slot - 1; + assert(first || ((*last_shader == BeamformerShaderKind_Demodulate || + *last_shader == BeamformerShaderKind_Filter))); + + bp->Decode.decode_mode = pb->parameters.decode_mode; + bp->Decode.transmit_count = pb->parameters.acquisition_count; + + bp->Decode.input_sample_stride = first? 1 : lp->Filter.output_sample_stride; + bp->Decode.input_channel_stride = first? pb->parameters.raw_data_dimensions[0] : lp->Filter.output_channel_stride; + bp->Decode.input_transmit_stride = first? pb->parameters.sample_count : 1; - cp->das_bake.time_offset += f->time_delay; + bp->Decode.output_sample_stride = das_sample_stride; + bp->Decode.output_channel_stride = das_channel_stride; + bp->Decode.output_transmit_stride = das_transmit_stride; + if (first) { + bp->Decode.output_channel_stride *= decimation_rate; + bp->Decode.output_transmit_stride *= decimation_rate; + } + + cp->shader_dispatch[slot].x = (u32)ceil_f32((f32)sample_count / DECODE_LOCAL_SIZE_X); + cp->shader_dispatch[slot].y = (u32)ceil_f32((f32)pb->parameters.channel_count / DECODE_LOCAL_SIZE_Y); + cp->shader_dispatch[slot].z = (u32)ceil_f32((f32)pb->parameters.acquisition_count / DECODE_LOCAL_SIZE_Z); - b->filter_length = (u32)f->length; - b->sampling_mode = pb->parameters.sampling_mode; - u32 *flags = cp->shader_flags + cp->pipeline.shader_count; - *flags = BeamformerShaderFilterFlags_Demodulate; - if (f->parameters.complex) *flags |= BeamformerShaderFilterFlags_ComplexFilter; - if (!decode_first) *flags |= BeamformerShaderFilterFlags_MapChannels; + if (first) cp->shader_dispatch[slot].x *= decimation_rate; - b->data_kind = data_kind; - if (decode_first) - b->data_kind = BeamformerDataKind_Float32; + /* NOTE(rnp): decode 2 samples per dispatch when data is i16 */ + if (first && data_kind == BeamformerDataKind_Int16) + cp->shader_dispatch[slot].x = (u32)ceil_f32((f32)cp->shader_dispatch[slot].x / 2); commit = 1; }break; - case BeamformerShaderKind_Filter:{ - BeamformerShaderFilterBakeParameters *b = &cp->filter_bake; + case BeamformerShaderKind_Demodulate: + case BeamformerShaderKind_Filter: + { + b32 first = slot == 0; + b32 demod = shader == BeamformerShaderKind_Demodulate; BeamformerFilter *f = cp->filters + sp->filter_slot; - cp->das_bake.time_offset += f->time_delay; + time_offset += f->time_delay; + + bp->Filter.filter_length = (u32)f->length; + bp->Filter.sampling_mode = pb->parameters.sampling_mode; + if (demod) bp->flags |= BeamformerShaderFilterFlags_Demodulate; + if (f->parameters.complex) bp->flags |= BeamformerShaderFilterFlags_ComplexFilter; + if (first) bp->flags |= BeamformerShaderFilterFlags_MapChannels; + + bp->data_kind = data_kind; + if (!first) bp->data_kind = BeamformerDataKind_Float32; + + /* NOTE(rnp): when we are demodulating we pretend that the sampler was alternating + * between sampling the I portion and the Q portion of an IQ signal. Therefore there + * is an implicit decimation factor of 2 which must always be included. All code here + * assumes that the signal was sampled in such a way that supports this operation. + * To recover IQ[n] from the sampled data (RF[n]) we do the following: + * I[n] = RF[n] + * Q[n] = RF[n + 1] + * IQ[n] = I[n] - j*Q[n] + */ + if (demod) { + bp->Filter.demodulation_frequency = pb->parameters.demodulation_frequency; + bp->Filter.sampling_frequency = pb->parameters.sampling_frequency / 2; + bp->Filter.decimation_rate = decimation_rate; + + if (first) { + bp->Filter.input_channel_stride = pb->parameters.raw_data_dimensions[0] / 2; + bp->Filter.input_sample_stride = 1; + bp->Filter.input_transmit_stride = pb->parameters.sample_count / 2; + + /* NOTE(rnp): output optimized layout for decoding */ + bp->Filter.output_channel_stride = das_channel_stride; + bp->Filter.output_sample_stride = pb->parameters.acquisition_count; + bp->Filter.output_transmit_stride = 1; + } else { + assert(cp->pipeline.shaders[slot - 1] == BeamformerShaderKind_Decode); + bp->Filter.input_channel_stride = lp->Decode.output_channel_stride; + bp->Filter.input_sample_stride = lp->Decode.output_sample_stride; + bp->Filter.input_transmit_stride = lp->Decode.output_transmit_stride; + + bp->Filter.output_channel_stride = das_channel_stride; + bp->Filter.output_sample_stride = das_sample_stride; + bp->Filter.output_transmit_stride = das_transmit_stride; + } + } else { + bp->Filter.decimation_rate = 1; + bp->Filter.output_channel_stride = sample_count * pb->parameters.acquisition_count; + bp->Filter.output_sample_stride = 1; + bp->Filter.output_transmit_stride = sample_count; + bp->Filter.input_channel_stride = sample_count * pb->parameters.acquisition_count; + bp->Filter.input_sample_stride = 1; + bp->Filter.input_transmit_stride = sample_count; + } - b->filter_length = (u32)f->length; - u32 *flags = cp->shader_flags + cp->pipeline.shader_count; - *flags = 0; - if (f->parameters.complex) *flags |= BeamformerShaderFilterFlags_ComplexFilter; + /* TODO(rnp): filter may need a different dispatch layout */ + cp->shader_dispatch[slot].x = (u32)ceil_f32((f32)sample_count / FILTER_LOCAL_SIZE_X); + cp->shader_dispatch[slot].y = (u32)ceil_f32((f32)pb->parameters.channel_count / FILTER_LOCAL_SIZE_Y); + cp->shader_dispatch[slot].z = (u32)ceil_f32((f32)pb->parameters.acquisition_count / FILTER_LOCAL_SIZE_Z); - b->data_kind = data_kind; - if (decode_first) - b->data_kind = BeamformerDataKind_Float32; + cp->rf_size = sample_count * pb->parameters.channel_count * pb->parameters.acquisition_count; + if (demodulate || run_cuda_hilbert) cp->rf_size *= 8; + else cp->rf_size *= 4; commit = 1; }break; case BeamformerShaderKind_DAS:{ - cp->das_bake.data_kind = BeamformerDataKind_Float32; - if (demodulate || run_cuda_hilbert) - cp->das_bake.data_kind = BeamformerDataKind_Float32Complex; - cp->shader_flags[cp->pipeline.shader_count] = das_flags; + bp->data_kind = BeamformerDataKind_Float32; + if (cp->iq_pipeline) + bp->data_kind = BeamformerDataKind_Float32Complex; + + BeamformerDASUBO *du = &cp->das_ubo_data; + du->voxel_transform = das_voxel_transform_matrix(&pb->parameters); + mem_copy(du->xdc_transform.E, pb->parameters.xdc_transform, sizeof(du->xdc_transform)); + mem_copy(du->xdc_element_pitch.E, pb->parameters.xdc_element_pitch, sizeof(du->xdc_element_pitch)); + bp->DAS.sampling_frequency = sampling_frequency; + bp->DAS.demodulation_frequency = pb->parameters.demodulation_frequency; + bp->DAS.speed_of_sound = pb->parameters.speed_of_sound; + bp->DAS.time_offset = time_offset; + bp->DAS.f_number = pb->parameters.f_number; + bp->DAS.acquisition_kind = pb->parameters.das_shader_id; + bp->DAS.sample_count = sample_count; + bp->DAS.channel_count = pb->parameters.channel_count; + bp->DAS.acquisition_count = pb->parameters.acquisition_count; + bp->DAS.interpolation_mode = pb->parameters.interpolation_mode; + bp->DAS.transmit_angle = pb->parameters.focal_vector[0]; + bp->DAS.focus_depth = pb->parameters.focal_vector[1]; + bp->DAS.transmit_receive_orientation = pb->parameters.transmit_receive_orientation; + + if (pb->parameters.single_focus) bp->flags |= BeamformerShaderDASFlags_SingleFocus; + if (pb->parameters.single_orientation) bp->flags |= BeamformerShaderDASFlags_SingleOrientation; + if (pb->parameters.coherency_weighting) bp->flags |= BeamformerShaderDASFlags_CoherencyWeighting; + else bp->flags |= BeamformerShaderDASFlags_Fast; + + u32 id = pb->parameters.das_shader_id; + if (id == BeamformerAcquisitionKind_UFORCES || id == BeamformerAcquisitionKind_UHERCULES) + bp->flags |= BeamformerShaderDASFlags_Sparse; + + if (id == BeamformerAcquisitionKind_HERO_PA) + bp->flags |= BeamformerShaderDASFlags_ReceiveOnly; + commit = 1; }break; default:{ commit = 1; }break; @@ -577,104 +649,6 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) } } cp->pipeline.data_kind = data_kind; - - u32 das_sample_stride = 1; - u32 das_transmit_stride = cp->das_bake.sample_count; - u32 das_channel_stride = cp->das_bake.acquisition_count * cp->das_bake.sample_count; - - u32 decimation_rate = MAX(pb->parameters.decimation_rate, 1); - if (demodulate) { - das_channel_stride /= (2 * decimation_rate); - das_transmit_stride /= (2 * decimation_rate); - } - - u32 input_sample_stride = 1; - u32 input_transmit_stride = cp->das_bake.sample_count; - u32 input_channel_stride = pb->parameters.raw_data_dimensions[0]; - - BeamformerShaderDecodeBakeParameters *dp = &cp->decode_bake; - dp->decode_mode = pb->parameters.decode_mode; - dp->transmit_count = cp->das_bake.acquisition_count; - - dp->input_sample_stride = decode_first? input_sample_stride : cp->das_bake.acquisition_count; - dp->input_channel_stride = decode_first? input_channel_stride : das_channel_stride; - dp->input_transmit_stride = decode_first? input_transmit_stride : 1; - dp->output_sample_stride = das_sample_stride; - dp->output_channel_stride = das_channel_stride; - dp->output_transmit_stride = das_transmit_stride; - if (decode_first) { - dp->output_channel_stride *= decimation_rate; - dp->output_transmit_stride *= decimation_rate; - } - - cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_bake.sample_count / DECODE_LOCAL_SIZE_X); - cp->decode_dispatch.y = (u32)ceil_f32((f32)cp->das_bake.channel_count / DECODE_LOCAL_SIZE_Y); - cp->decode_dispatch.z = (u32)ceil_f32((f32)cp->das_bake.acquisition_count / DECODE_LOCAL_SIZE_Z); - - /* NOTE(rnp): decode 2 samples per dispatch when data is i16 */ - if (decode_first && data_kind == BeamformerDataKind_Int16) - cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->decode_dispatch.x / 2); - - /* NOTE(rnp): when we are demodulating we pretend that the sampler was alternating - * between sampling the I portion and the Q portion of an IQ signal. Therefore there - * is an implicit decimation factor of 2 which must always be included. All code here - * assumes that the signal was sampled in such a way that supports this operation. - * To recover IQ[n] from the sampled data (RF[n]) we do the following: - * I[n] = RF[n] - * Q[n] = RF[n + 1] - * IQ[n] = I[n] - j*Q[n] - */ - if (demodulate) { - BeamformerShaderFilterBakeParameters *b = &cp->demodulate_bake; - b->demodulation_frequency = cp->das_bake.demodulation_frequency; - b->sampling_frequency = cp->das_bake.sampling_frequency / 2; - b->decimation_rate = decimation_rate; - - cp->das_bake.sampling_frequency /= 2 * (f32)b->decimation_rate; - cp->das_bake.sample_count /= 2 * b->decimation_rate; - - if (decode_first) { - b->input_channel_stride = dp->output_channel_stride; - b->input_sample_stride = dp->output_sample_stride; - b->input_transmit_stride = dp->output_transmit_stride; - - b->output_channel_stride = das_channel_stride; - b->output_sample_stride = das_sample_stride; - b->output_transmit_stride = das_transmit_stride; - } else { - b->input_channel_stride = input_channel_stride / 2; - b->input_sample_stride = input_sample_stride; - b->input_transmit_stride = input_transmit_stride / 2; - - /* NOTE(rnp): output optimized layout for decoding */ - b->output_channel_stride = dp->input_channel_stride; - b->output_sample_stride = dp->input_sample_stride; - b->output_transmit_stride = dp->input_transmit_stride; - - cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_bake.sample_count / DECODE_LOCAL_SIZE_X); - } - } - - /* TODO(rnp): filter may need a different dispatch layout */ - cp->demod_dispatch.x = (u32)ceil_f32((f32)cp->das_bake.sample_count / FILTER_LOCAL_SIZE_X); - cp->demod_dispatch.y = (u32)ceil_f32((f32)cp->das_bake.channel_count / FILTER_LOCAL_SIZE_Y); - cp->demod_dispatch.z = (u32)ceil_f32((f32)cp->das_bake.acquisition_count / FILTER_LOCAL_SIZE_Z); - - cp->rf_size = cp->das_bake.sample_count * cp->das_bake.channel_count * cp->das_bake.acquisition_count; - if (demodulate || run_cuda_hilbert) cp->rf_size *= 8; - else cp->rf_size *= 4; - - /* TODO(rnp): UBO per filter stage */ - BeamformerShaderFilterBakeParameters *fltb = &cp->filter_bake; - fltb->demodulation_frequency = cp->das_bake.demodulation_frequency; - fltb->sampling_frequency = cp->das_bake.sampling_frequency; - fltb->decimation_rate = 1; - fltb->output_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count; - fltb->output_sample_stride = 1; - fltb->output_transmit_stride = cp->das_bake.sample_count; - fltb->input_channel_stride = cp->das_bake.sample_count * cp->das_bake.acquisition_count; - fltb->input_sample_stride = 1; - fltb->input_transmit_stride = cp->das_bake.sample_count; } function void @@ -752,19 +726,11 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl if (beamformer_shader_bake_parameter_counts[reloadable_index]) { i32 count = beamformer_shader_bake_parameter_counts[reloadable_index]; - u32 *parameters = 0; - /* TODO(rnp): generate this */ - switch (shader) { - case BeamformerShaderKind_Decode:{ parameters = cp->decode_bake.E; }break; - case BeamformerShaderKind_Demodulate:{ parameters = cp->demodulate_bake.E; }break; - case BeamformerShaderKind_Filter:{ parameters = cp->filter_bake.E; }break; - case BeamformerShaderKind_DAS:{ parameters = cp->das_bake.E; }break; - default:{}break; - } + BeamformerShaderBakeParameters *bp = cp->shader_bake_parameters + shader_slot; - assert(parameters); - s8 *names = beamformer_shader_bake_parameter_names[reloadable_index]; - u8 *is_float = beamformer_shader_bake_parameter_is_float[reloadable_index]; + u32 *parameters = (u32 *)bp; + s8 *names = beamformer_shader_bake_parameter_names[reloadable_index]; + u8 *is_float = beamformer_shader_bake_parameter_is_float[reloadable_index]; for (i32 index = 0; index < count; index++) { stream_append_s8s(&shader_stream, s8("#define "), names[index], is_float[index]? s8(" uintBitsToFloat") : s8(" "), s8("(0x")); @@ -772,11 +738,13 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl stream_append_s8(&shader_stream, s8(")\n")); } - stream_append_byte(&shader_stream, '\n'); + stream_append_s8(&shader_stream, s8("#define DataKind (0x")); + stream_append_hex_u64(&shader_stream, bp->data_kind); + stream_append_s8(&shader_stream, s8(")\n\n")); s8 *flag_names = beamformer_shader_flag_strings[reloadable_index]; u32 flag_count = beamformer_shader_flag_strings_count[reloadable_index]; - u32 flags = cp->shader_flags[shader_slot]; + u32 flags = bp->flags; for (u32 bit = 0; bit < flag_count; bit++) { stream_append_s8s(&shader_stream, s8("#define "), flag_names[bit], (flags & (1 << bit))? s8(" 1") : s8(" 0"), s8("\n")); @@ -824,12 +792,15 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, BEAMFORMER_COMPUTE_UBO_LIST #undef X + cp->acquisition_count = pb->parameters.acquisition_count; + cp->acquisition_kind = pb->parameters.das_shader_id; + u32 decoded_data_size = cp->rf_size; if (ctx->compute_context.ping_pong_ssbo_size < decoded_data_size) alloc_shader_storage(ctx, decoded_data_size, arena); - if (cp->hadamard_order != (i32)cp->das_bake.acquisition_count) - update_hadamard_texture(cp, (i32)cp->das_bake.acquisition_count, arena); + if (cp->hadamard_order != (i32)cp->acquisition_count) + update_hadamard_texture(cp, (i32)cp->acquisition_count, arena); cp->min_coordinate = v3_from_f32_array(pb->parameters.output_min_coordinate); cp->max_coordinate = v3_from_f32_array(pb->parameters.output_max_coordinate); @@ -886,6 +857,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame u32 output_ssbo_idx = !cc->last_output_ssbo_index; u32 input_ssbo_idx = cc->last_output_ssbo_index; + uv3 dispatch = cp->shader_dispatch[shader_slot]; switch (shader) { case BeamformerShaderKind_Decode:{ glBindImageTexture(0, cp->textures[BeamformerComputeTextureKind_Hadamard], 0, 0, 0, GL_READ_ONLY, GL_R32F); @@ -895,7 +867,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame glBindImageTexture(1, cp->textures[BeamformerComputeTextureKind_ChannelMapping], 0, 0, 0, GL_READ_ONLY, GL_R16I); glProgramUniform1ui(program, DECODE_FIRST_PASS_UNIFORM_LOC, 1); - glDispatchCompute(cp->decode_dispatch.x, cp->decode_dispatch.y, cp->decode_dispatch.z); + glDispatchCompute(dispatch.x, dispatch.y, dispatch.z); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); } @@ -904,7 +876,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame glProgramUniform1ui(program, DECODE_FIRST_PASS_UNIFORM_LOC, 0); - glDispatchCompute(cp->decode_dispatch.x, cp->decode_dispatch.y, cp->decode_dispatch.z); + glDispatchCompute(dispatch.x, dispatch.y, dispatch.z); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); cc->last_output_ssbo_index = !cc->last_output_ssbo_index; @@ -920,7 +892,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame case BeamformerShaderKind_Filter: case BeamformerShaderKind_Demodulate: { - b32 map_channels = (cp->shader_flags[shader_slot] & BeamformerShaderFilterFlags_MapChannels) != 0; + b32 map_channels = (cp->shader_bake_parameters[shader_slot].flags & BeamformerShaderFilterFlags_MapChannels) != 0; glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, cc->ping_pong_ssbos[output_ssbo_idx]); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, cp->filters[sp->filter_slot].ssbo); @@ -930,7 +902,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame else glBindImageTexture(1, cp->textures[BeamformerComputeTextureKind_ChannelMapping], 0, 0, 0, GL_READ_ONLY, GL_R16I); - glDispatchCompute(cp->demod_dispatch.x, cp->demod_dispatch.y, cp->demod_dispatch.z); + glDispatchCompute(dispatch.x, dispatch.y, dispatch.z); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); cc->last_output_ssbo_index = !cc->last_output_ssbo_index; @@ -951,9 +923,9 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame case BeamformerShaderKind_DAS:{ local_persist u32 das_cycle_t = 0; - u32 local_flags = cp->shader_flags[shader_slot]; - b32 fast = (local_flags & BeamformerShaderDASFlags_Fast) != 0; - b32 sparse = (local_flags & BeamformerShaderDASFlags_Sparse) != 0; + BeamformerShaderBakeParameters *bp = cp->shader_bake_parameters + shader_slot; + b32 fast = (bp->flags & BeamformerShaderDASFlags_Fast) != 0; + b32 sparse = (bp->flags & BeamformerShaderDASFlags_Sparse) != 0; if (fast) { glClearTexImage(frame->texture, 0, GL_RED, GL_FLOAT, 0); @@ -976,14 +948,14 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame if (fast) { i32 loop_end; - if (cp->das_bake.acquisition_kind == BeamformerAcquisitionKind_RCA_VLS || - cp->das_bake.acquisition_kind == BeamformerAcquisitionKind_RCA_TPW) + if (bp->DAS.acquisition_kind == BeamformerAcquisitionKind_RCA_VLS || + bp->DAS.acquisition_kind == BeamformerAcquisitionKind_RCA_TPW) { /* NOTE(rnp): to avoid repeatedly sampling the whole focal vectors * texture we loop over transmits for VLS/TPW */ - loop_end = (i32)cp->das_bake.acquisition_count; + loop_end = (i32)bp->DAS.acquisition_count; } else { - loop_end = (i32)cp->das_bake.channel_count; + loop_end = (i32)bp->DAS.channel_count; } f32 percent_per_step = 1.0f / (f32)loop_end; cc->processing_progress = -percent_per_step; @@ -1213,8 +1185,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c frame->min_coordinate = cp->min_coordinate; frame->max_coordinate = cp->max_coordinate; - frame->acquisition_kind = cp->das_bake.acquisition_kind; - frame->compound_count = cp->das_bake.acquisition_count; + frame->acquisition_kind = cp->acquisition_kind; + frame->compound_count = cp->acquisition_count; BeamformerComputeContext *cc = &ctx->compute_context; BeamformerComputePipeline *pipeline = &cp->pipeline; diff --git a/beamformer.h b/beamformer.h @@ -155,11 +155,11 @@ struct BeamformerComputePlan { u32 programs[BeamformerMaxComputeShaderStages]; - uv3 decode_dispatch; - uv3 demod_dispatch; - u32 dirty_programs; + BeamformerAcquisitionKind acquisition_kind; + u32 acquisition_count; + u32 rf_size; i32 hadamard_order; b32 iq_pipeline; @@ -178,12 +178,8 @@ struct BeamformerComputePlan { BEAMFORMER_COMPUTE_UBO_LIST #undef X - BeamformerShaderDecodeBakeParameters decode_bake; - BeamformerShaderFilterBakeParameters demodulate_bake; - BeamformerShaderFilterBakeParameters filter_bake; - BeamformerShaderDASBakeParameters das_bake; - - u32 shader_flags[BeamformerMaxComputeShaderStages]; + BeamformerShaderBakeParameters shader_bake_parameters[BeamformerMaxComputeShaderStages]; + uv3 shader_dispatch[BeamformerMaxComputeShaderStages]; BeamformerComputePlan *next; }; diff --git a/beamformer.meta b/beamformer.meta @@ -63,7 +63,6 @@ @Bake { - @BakeInt(DataKind data_kind ) @BakeInt(DecodeMode decode_mode ) @BakeInt(InputChannelStride input_channel_stride ) @BakeInt(InputSampleStride input_sample_stride ) @@ -83,7 +82,6 @@ @Bake { - @BakeInt(DataKind data_kind ) @BakeInt(DecimationRate decimation_rate ) @BakeInt(FilterLength filter_length ) @BakeInt(InputChannelStride input_channel_stride ) @@ -113,7 +111,6 @@ @BakeInt(AcquisitionCount acquisition_count ) @BakeInt(AcquisitionKind acquisition_kind ) @BakeInt(ChannelCount channel_count ) - @BakeInt(DataKind data_kind ) @BakeInt(InterpolationMode interpolation_mode ) @BakeInt(SampleCount sample_count ) @BakeInt(TransmitReceiveOrientation transmit_receive_orientation) diff --git a/build.c b/build.c @@ -2147,20 +2147,40 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena) MetaShader *s = ctx->shaders.data + b->shader_id; s8 name = push_s8_from_parts(&m->scratch, s8(""), s8("BeamformerShader"), ctx->shader_names.data[s->name_id], s8("BakeParameters")); - meta_begin_scope(m, s8("typedef union {")); - meta_begin_scope(m, s8("struct {")); - for (u32 entry = 0; entry < b->entry_count; entry++) { - s8 kind = b->floating_point[entry] ? s8("f32 ") : s8("u32 "); - meta_push_line(m, kind, b->names_lower[entry], s8(";")); - } - meta_end_scope(m, s8("};")); - meta_begin_line(m, s8("u32 E[")); - meta_push_u64(m, b->entry_count); - meta_end_line(m, s8("];")); + meta_begin_scope(m, s8("typedef struct {")); + for (u32 entry = 0; entry < b->entry_count; entry++) { + s8 kind = b->floating_point[entry] ? s8("f32 ") : s8("u32 "); + meta_push_line(m, kind, b->names_lower[entry], s8(";")); + } meta_end_scope(m, s8("} "), name, s8(";\n")); m->scratch = tmp; } + // shader bake parameter struct + meta_begin_scope(m, s8("typedef struct {")); + { + meta_begin_scope(m, s8("union {")); + { + Arena tmp = m->scratch; + s8 *columns[2]; + columns[0] = push_array(&m->scratch, s8, ctx->shader_bake_parameters.count); + columns[1] = push_array(&m->scratch, s8, ctx->shader_bake_parameters.count); + for (u32 bake = 0; bake < ctx->shader_bake_parameters.count; bake++) { + MetaShaderBakeParameters *b = ctx->shader_bake_parameters.data + bake; + MetaShader *s = ctx->shaders.data + b->shader_id; + columns[0][bake] = push_s8_from_parts(&m->scratch, s8(""), s8("BeamformerShader"), + ctx->shader_names.data[s->name_id], s8("BakeParameters")); + columns[1][bake] = ctx->shader_names.data[s->name_id]; + } + metagen_push_table(m, m->scratch, s8(""), s8(";"), columns, + (uz)ctx->shader_bake_parameters.count, 2); + m->scratch = tmp; + } meta_end_scope(m, s8("};")); + s8 names[] = {s8("data_kind"), s8("flags")}; + s8 types[] = {s8("u32"), s8("u32")}; + metagen_push_table(m, m->scratch, s8(""), s8(";"), (s8 *[]){types, names}, countof(names), 2); + } meta_end_scope(m, s8("} BeamformerShaderBakeParameters;\n")); + ///////////////////////////////// // NOTE(rnp): shader info tables meta_begin_scope(m, s8("read_only global s8 beamformer_shader_names[] = {")); diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c @@ -90,59 +90,57 @@ typedef enum { BeamformerShaderKind_RenderCount = 1, } BeamformerShaderKind; -typedef union { - struct { - u32 data_kind; - u32 decode_mode; - u32 input_channel_stride; - u32 input_sample_stride; - u32 input_transmit_stride; - u32 output_channel_stride; - u32 output_sample_stride; - u32 output_transmit_stride; - u32 transmit_count; - }; - u32 E[9]; +typedef struct { + u32 decode_mode; + u32 input_channel_stride; + u32 input_sample_stride; + u32 input_transmit_stride; + u32 output_channel_stride; + u32 output_sample_stride; + u32 output_transmit_stride; + u32 transmit_count; } BeamformerShaderDecodeBakeParameters; -typedef union { - struct { - u32 data_kind; - u32 decimation_rate; - u32 filter_length; - u32 input_channel_stride; - u32 input_sample_stride; - u32 input_transmit_stride; - u32 output_channel_stride; - u32 output_sample_stride; - u32 output_transmit_stride; - u32 sampling_mode; - f32 demodulation_frequency; - f32 sampling_frequency; - }; - u32 E[12]; +typedef struct { + u32 decimation_rate; + u32 filter_length; + u32 input_channel_stride; + u32 input_sample_stride; + u32 input_transmit_stride; + u32 output_channel_stride; + u32 output_sample_stride; + u32 output_transmit_stride; + u32 sampling_mode; + f32 demodulation_frequency; + f32 sampling_frequency; } BeamformerShaderFilterBakeParameters; -typedef union { - struct { - u32 acquisition_count; - u32 acquisition_kind; - u32 channel_count; - u32 data_kind; - u32 interpolation_mode; - u32 sample_count; - u32 transmit_receive_orientation; - f32 demodulation_frequency; - f32 f_number; - f32 focus_depth; - f32 sampling_frequency; - f32 speed_of_sound; - f32 time_offset; - f32 transmit_angle; - }; - u32 E[14]; +typedef struct { + u32 acquisition_count; + u32 acquisition_kind; + u32 channel_count; + u32 interpolation_mode; + u32 sample_count; + u32 transmit_receive_orientation; + f32 demodulation_frequency; + f32 f_number; + f32 focus_depth; + f32 sampling_frequency; + f32 speed_of_sound; + f32 time_offset; + f32 transmit_angle; } BeamformerShaderDASBakeParameters; +typedef struct { + union { + BeamformerShaderDecodeBakeParameters Decode; + BeamformerShaderFilterBakeParameters Filter; + BeamformerShaderDASBakeParameters DAS; + }; + u32 data_kind; + u32 flags; +} BeamformerShaderBakeParameters; + read_only global s8 beamformer_shader_names[] = { s8_comp("CudaDecode"), s8_comp("CudaHilbert"), @@ -288,7 +286,6 @@ read_only global i32 beamformer_shader_header_vector_lengths[] = { read_only global s8 *beamformer_shader_bake_parameter_names[] = { (s8 []){ - s8_comp("DataKind"), s8_comp("DecodeMode"), s8_comp("InputChannelStride"), s8_comp("InputSampleStride"), @@ -299,7 +296,6 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = { s8_comp("TransmitCount"), }, (s8 []){ - s8_comp("DataKind"), s8_comp("DecimationRate"), s8_comp("FilterLength"), s8_comp("InputChannelStride"), @@ -316,7 +312,6 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = { s8_comp("AcquisitionCount"), s8_comp("AcquisitionKind"), s8_comp("ChannelCount"), - s8_comp("DataKind"), s8_comp("InterpolationMode"), s8_comp("SampleCount"), s8_comp("TransmitReceiveOrientation"), @@ -334,18 +329,18 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = { }; read_only global u8 *beamformer_shader_bake_parameter_is_float[] = { - (u8 []){0, 0, 0, 0, 0, 0, 0, 0, 0}, - (u8 []){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, - (u8 []){0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1}, + (u8 []){0, 0, 0, 0, 0, 0, 0, 0}, + (u8 []){0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, + (u8 []){0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1}, 0, 0, 0, }; read_only global i32 beamformer_shader_bake_parameter_counts[] = { - 9, - 12, - 14, + 8, + 11, + 13, 0, 0, 0,