ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: c2a462b584b7196540a5f709f3c62245f84b2459
Parent: d401677f05f7c413d9ce3578b84e662573427b3a
Author: Randy Palamar
Date:   Sat, 23 Aug 2025 20:49:48 -0600

core/lib: rearrange DAS UBO; simplify library parameters struct

Diffstat:
Mbeamformer.c | 301+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
Mbeamformer.h | 64+++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Mbeamformer_parameters.h | 109+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Mbeamformer_shared_memory.c | 10++++++++--
Mbuild.c | 22+++++++++++++++++++++-
Mmath.c | 21++++++++++-----------
Mshaders/das.glsl | 88++++++++++++++++++++++++++++++++++++++-----------------------------------------
Mshaders/filter.glsl | 35+++++++++++++++++++++++++++--------
Mstatic.c | 10+++-------
Mtests/throughput.c | 60++++++++++++++++++++++++++++++++++++++----------------------
Mui.c | 27+++++++++++++++------------
11 files changed, 451 insertions(+), 296 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -1,5 +1,9 @@ /* See LICENSE for license details. */ /* TODO(rnp): + * [ ]: filter shader specializations need to be generated per sample mode + * - performance was measured with a switch on sampling mode and the perfomance gained + * is 80% worse than just having a baked in sampling mode + * - should also include channel mapping just in case * [ ]: make decode output real values for real inputs and complex values for complex inputs * - this means that das should have a RF version and an IQ version * - this will also flip the current hack to support demodulate after decode to @@ -152,16 +156,6 @@ beamformer_filter_update(BeamformerFilter *f, BeamformerFilterKind kind, glObjectLabel(GL_TEXTURE, f->texture, (i32)label.len, (c8 *)label.data); } -function iv3 -make_valid_test_dim(i32 in[3]) -{ - iv3 result; - result.E[0] = MAX(in[0], 1); - result.E[1] = MAX(in[1], 1); - result.E[2] = MAX(in[2], 1); - return result; -} - function ComputeFrameIterator compute_frame_iterator(BeamformerCtx *ctx, u32 start_index, u32 needed_frames) { @@ -269,7 +263,8 @@ alloc_shader_storage(BeamformerCtx *ctx, u32 decoded_data_size, Arena arena) BeamformerParameterBlock *pb = beamformer_parameter_block(ctx->shared_memory.region, 0); /* NOTE(rnp): these are stubs when CUDA isn't supported */ cuda_register_buffers(cc->ping_pong_ssbos, countof(cc->ping_pong_ssbos), cc->rf_buffer.ssbo); - cuda_init(pb->parameters.rf_raw_dim, pb->parameters.dec_data_dim); + u32 decoded_data_dimension[3] = {pb->parameters.sample_count, pb->parameters.channel_count, pb->parameters.acquisition_count}; + cuda_init(pb->parameters.raw_data_dimensions, decoded_data_dimension); } function void @@ -384,10 +379,74 @@ compute_cursor_finished(struct compute_cursor *cursor) return result; } +function m4 +das_voxel_transform_matrix(BeamformerParameters *bp) +{ + v3 min = v3_from_f32_array(bp->output_min_coordinate); + v3 max = v3_from_f32_array(bp->output_max_coordinate); + v3 extent = v3_abs(v3_sub(max, min)); + v3 points = {{(f32)bp->output_points[0], (f32)bp->output_points[1], (f32)bp->output_points[2]}}; + + m4 T1 = m4_translation(v3_scale(v3_sub(points, (v3){{1.0f, 1.0f, 1.0f}}), -0.5f)); + m4 T2 = m4_translation(v3_add(min, v3_scale(extent, 0.5f))); + m4 S = m4_scale(v3_div(extent, points)); + + m4 R; + switch (bp->das_shader_id) { + case DASShaderKind_FORCES: + case DASShaderKind_UFORCES: + case DASShaderKind_Flash: + { + R = m4_identity(); + S.c[1].E[1] = 0; + T2.c[3].E[1] = 0; + }break; + case DASShaderKind_HERCULES: + case DASShaderKind_UHERCULES: + case DASShaderKind_RCA_TPW: + case DASShaderKind_RCA_VLS: + { + R = m4_rotation_about_z(bp->beamform_plane ? 0.0f : 0.25f); + if (!(points.x > 1 && points.y > 1 && points.z > 1)) + T2.c[3].E[1] = bp->off_axis_pos; + }break; + default:{ R = m4_identity(); }break; + } + m4 result = m4_mul(R, m4_mul(T2, m4_mul(S, T1))); + return result; +} + +function void +das_ubo_from_beamformer_parameters(BeamformerDASUBO *du, BeamformerParameters *bp) +{ + du->voxel_transform = das_voxel_transform_matrix(bp); + mem_copy(du->xdc_transform.E, bp->xdc_transform, sizeof(du->xdc_transform)); + mem_copy(du->xdc_element_pitch.E, bp->xdc_element_pitch, sizeof(du->xdc_element_pitch)); + du->sampling_frequency = bp->sampling_frequency; + du->demodulation_frequency = bp->demodulation_frequency; + du->speed_of_sound = bp->speed_of_sound; + du->time_offset = bp->time_offset; + du->f_number = bp->f_number; + du->shader_kind = bp->das_shader_id; + du->sample_count = bp->sample_count; + du->channel_count = bp->channel_count; + du->acquisition_count = bp->acquisition_count; + + du->shader_flags = 0; + if (bp->interpolate) du->shader_flags |= DASShaderFlags_Interpolate; + if (bp->coherency_weighting) du->shader_flags |= DASShaderFlags_CoherencyWeighting; + if (bp->transmit_mode == BeamformerTransmitMode_Columns) + du->shader_flags |= DASShaderFlags_TxColumns; + if (bp->receive_mode == BeamformerReceiveMode_Columns) + du->shader_flags |= DASShaderFlags_RxColumns; +} + function void plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) { - BeamformerParameters *bp = &cp->das_ubo_data; + BeamformerDASUBO *bp = &cp->das_ubo_data; + + das_ubo_from_beamformer_parameters(bp, &pb->parameters); b32 decode_first = pb->pipeline.shaders[0] == BeamformerShaderKind_Decode; b32 run_cuda_hilbert = 0; @@ -403,8 +462,6 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) if (demodulate) run_cuda_hilbert = 0; - mem_copy(bp, &pb->parameters, sizeof(*bp)); - BeamformerDataKind data_kind = pb->pipeline.data_kind; cp->pipeline.shader_count = 0; for (u32 i = 0; i < pb->pipeline.shader_count; i++) { @@ -452,7 +509,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) commit = 1; }break; case BeamformerShaderKind_DAS:{ - if (!bp->coherency_weighting) + if ((bp->shader_flags & DASShaderFlags_CoherencyWeighting) == 0) shader = BeamformerShaderKind_DASFast; commit = 1; }break; @@ -468,39 +525,39 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) cp->pipeline.data_kind = data_kind; u32 das_sample_stride = 1; - u32 das_transmit_stride = bp->dec_data_dim[0]; - u32 das_channel_stride = bp->dec_data_dim[2] * bp->dec_data_dim[0]; + u32 das_transmit_stride = bp->sample_count; + u32 das_channel_stride = bp->acquisition_count * bp->sample_count; - bp->decimation_rate = MAX(bp->decimation_rate, 1); + u32 decimation_rate = MAX(pb->parameters.decimation_rate, 1); if (demodulate) { - das_channel_stride /= (2 * bp->decimation_rate); - das_transmit_stride /= (2 * bp->decimation_rate); + das_channel_stride /= (2 * decimation_rate); + das_transmit_stride /= (2 * decimation_rate); } u32 input_sample_stride = 1; - u32 input_transmit_stride = bp->dec_data_dim[0]; - u32 input_channel_stride = bp->rf_raw_dim[0]; + u32 input_transmit_stride = bp->sample_count; + u32 input_channel_stride = pb->parameters.raw_data_dimensions[0]; BeamformerDecodeUBO *dp = &cp->decode_ubo_data; - dp->decode_mode = bp->decode; - dp->transmit_count = bp->dec_data_dim[2]; + dp->decode_mode = pb->parameters.decode; + dp->transmit_count = bp->acquisition_count; - dp->input_sample_stride = decode_first? input_sample_stride : bp->dec_data_dim[2]; + dp->input_sample_stride = decode_first? input_sample_stride : bp->acquisition_count; dp->input_channel_stride = decode_first? input_channel_stride : das_channel_stride; dp->input_transmit_stride = decode_first? input_transmit_stride : 1; dp->output_sample_stride = das_sample_stride; dp->output_channel_stride = das_channel_stride; dp->output_transmit_stride = das_transmit_stride; if (decode_first) { - dp->output_channel_stride *= bp->decimation_rate; - dp->output_transmit_stride *= bp->decimation_rate; + dp->output_channel_stride *= decimation_rate; + dp->output_transmit_stride *= decimation_rate; } - if (!demodulate) bp->center_frequency = 0; + if (!demodulate) bp->demodulation_frequency = 0; - cp->decode_dispatch.x = (u32)ceil_f32((f32)bp->dec_data_dim[0] / DECODE_LOCAL_SIZE_X); - cp->decode_dispatch.y = (u32)ceil_f32((f32)bp->dec_data_dim[1] / DECODE_LOCAL_SIZE_Y); - cp->decode_dispatch.z = (u32)ceil_f32((f32)bp->dec_data_dim[2] / DECODE_LOCAL_SIZE_Z); + cp->decode_dispatch.x = (u32)ceil_f32((f32)bp->sample_count / DECODE_LOCAL_SIZE_X); + cp->decode_dispatch.y = (u32)ceil_f32((f32)bp->channel_count / DECODE_LOCAL_SIZE_Y); + cp->decode_dispatch.z = (u32)ceil_f32((f32)bp->acquisition_count / DECODE_LOCAL_SIZE_Z); /* NOTE(rnp): decode 2 samples per dispatch when data is i16 */ if (decode_first && data_kind == BeamformerDataKind_Int16) @@ -517,13 +574,14 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) */ if (demodulate) { BeamformerFilterUBO *mp = &cp->demod_ubo_data; - mp->demodulation_frequency = bp->center_frequency; + mp->demodulation_frequency = bp->demodulation_frequency; mp->sampling_frequency = bp->sampling_frequency / 2; - mp->decimation_rate = bp->decimation_rate; - mp->map_channels = !decode_first; + mp->decimation_rate = decimation_rate; + + if (!decode_first) mp->shader_flags |= FilterShaderFlags_MapChannels; bp->sampling_frequency /= 2 * (f32)mp->decimation_rate; - bp->dec_data_dim[0] /= 2 * mp->decimation_rate; + bp->sample_count /= 2 * mp->decimation_rate; if (decode_first) { mp->input_channel_stride = dp->output_channel_stride; @@ -543,30 +601,30 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) mp->output_sample_stride = dp->input_sample_stride; mp->output_transmit_stride = dp->input_transmit_stride; - cp->decode_dispatch.x = (u32)ceil_f32((f32)bp->dec_data_dim[0] / DECODE_LOCAL_SIZE_X); + cp->decode_dispatch.x = (u32)ceil_f32((f32)bp->sample_count / DECODE_LOCAL_SIZE_X); } } /* TODO(rnp): filter may need a different dispatch layout */ - cp->demod_dispatch.x = (u32)ceil_f32((f32)bp->dec_data_dim[0] / FILTER_LOCAL_SIZE_X); - cp->demod_dispatch.y = (u32)ceil_f32((f32)bp->dec_data_dim[1] / FILTER_LOCAL_SIZE_Y); - cp->demod_dispatch.z = (u32)ceil_f32((f32)bp->dec_data_dim[2] / FILTER_LOCAL_SIZE_Z); + cp->demod_dispatch.x = (u32)ceil_f32((f32)bp->sample_count / FILTER_LOCAL_SIZE_X); + cp->demod_dispatch.y = (u32)ceil_f32((f32)bp->channel_count / FILTER_LOCAL_SIZE_Y); + cp->demod_dispatch.z = (u32)ceil_f32((f32)bp->acquisition_count / FILTER_LOCAL_SIZE_Z); /* TODO(rnp): if IQ (* 8) else (* 4) */ - cp->rf_size = bp->dec_data_dim[0] * bp->dec_data_dim[1] * bp->dec_data_dim[2] * 8; + cp->rf_size = bp->sample_count * bp->channel_count * bp->acquisition_count * 8; /* TODO(rnp): UBO per filter stage */ BeamformerFilterUBO *flt = &cp->filter_ubo_data; - flt->demodulation_frequency = bp->center_frequency; + flt->demodulation_frequency = bp->demodulation_frequency; flt->sampling_frequency = bp->sampling_frequency; flt->decimation_rate = 1; - flt->map_channels = 0; - flt->output_channel_stride = bp->dec_data_dim[0] * bp->dec_data_dim[2]; + flt->shader_flags = pb->parameters.sampling_mode & FilterShaderFlags_SamplingModeMask; + flt->output_channel_stride = bp->sample_count * bp->acquisition_count; flt->output_sample_stride = 1; - flt->output_transmit_stride = bp->dec_data_dim[0]; - flt->input_channel_stride = bp->dec_data_dim[0] * bp->dec_data_dim[2]; + flt->output_transmit_stride = bp->sample_count; + flt->input_channel_stride = bp->sample_count * bp->acquisition_count; flt->input_sample_stride = 1; - flt->input_transmit_stride = bp->dec_data_dim[0]; + flt->input_transmit_stride = bp->sample_count; } function void @@ -594,13 +652,28 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, BEAMFORMER_COMPUTE_UBO_LIST #undef X - u32 *dec_data_dim = pb->parameters.dec_data_dim; - u32 decoded_data_size = (u32)(2 * sizeof(f32) * dec_data_dim[0] * dec_data_dim[1] * dec_data_dim[2]); + u32 samples = pb->parameters.sample_count; + u32 channels = pb->parameters.channel_count; + u32 acquisitions = pb->parameters.acquisition_count; + u32 decoded_data_size = (u32)(2 * sizeof(f32) * samples * channels * acquisitions); if (ctx->compute_context.ping_pong_ssbo_size < decoded_data_size) alloc_shader_storage(ctx, decoded_data_size, arena); - if (cp->hadamard_order != (i32)cp->das_ubo_data.dec_data_dim[2]) - update_hadamard_texture(cp, (i32)cp->das_ubo_data.dec_data_dim[2], arena); + if (cp->hadamard_order != (i32)cp->das_ubo_data.acquisition_count) + update_hadamard_texture(cp, (i32)cp->das_ubo_data.acquisition_count, arena); + + cp->min_coordinate = v3_from_f32_array(pb->parameters.output_min_coordinate); + cp->max_coordinate = v3_from_f32_array(pb->parameters.output_max_coordinate); + + cp->output_points.E[0] = MAX(pb->parameters.output_points[0], 1); + cp->output_points.E[1] = MAX(pb->parameters.output_points[1], 1); + cp->output_points.E[2] = MAX(pb->parameters.output_points[2], 1); + cp->average_frames = pb->parameters.output_points[3]; + + if (cp->average_frames > 1 && !iv3_equal(cp->output_points, ctx->averaged_frames[0].dim)) { + alloc_beamform_frame(&ctx->gl, ctx->averaged_frames + 0, cp->output_points, s8("Averaged Frame"), arena); + alloc_beamform_frame(&ctx->gl, ctx->averaged_frames + 1, cp->output_points, s8("Averaged Frame"), arena); + } }break; case BeamformerParameterBlockRegion_ChannelMapping: case BeamformerParameterBlockRegion_FocalVectors: @@ -638,43 +711,6 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, beamformer_parameter_block_unlock(&ctx->shared_memory, block); } -function m4 -das_voxel_transform_matrix(BeamformerParameters *bp) -{ - v3 min = v4_from_f32_array(bp->output_min_coordinate).xyz; - v3 max = v4_from_f32_array(bp->output_max_coordinate).xyz; - v3 extent = v3_abs(v3_sub(max, min)); - v3 points = {{(f32)bp->output_points[0], (f32)bp->output_points[1], (f32)bp->output_points[2]}}; - - m4 T1 = m4_translation(v3_scale(v3_sub(points, (v3){{1.0f, 1.0f, 1.0f}}), -0.5f)); - m4 T2 = m4_translation(v3_add(min, v3_scale(extent, 0.5f))); - m4 S = m4_scale(v3_div(extent, points)); - - m4 R; - switch (bp->das_shader_id) { - case DASShaderKind_FORCES: - case DASShaderKind_UFORCES: - case DASShaderKind_FLASH: - { - R = m4_identity(); - S.c[1].E[1] = 0; - T2.c[3].E[1] = 0; - }break; - case DASShaderKind_HERCULES: - case DASShaderKind_UHERCULES: - case DASShaderKind_RCA_TPW: - case DASShaderKind_RCA_VLS: - { - R = m4_rotation_about_z(bp->beamform_plane ? 0.0f : 0.25f); - if (!(points.x > 1 && points.y > 1 && points.z > 1)) - T2.c[3].E[1] = bp->off_axis_pos; - }break; - default:{ R = m4_identity(); }break; - } - m4 result = m4_mul(R, m4_mul(T2, m4_mul(S, T1))); - return result; -} - function void do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame *frame, BeamformerShaderKind shader, BeamformerShaderParameters *sp, Arena arena) @@ -739,12 +775,12 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame : BeamformerComputeUBOKind_Demodulate; glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[index]); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, cc->ping_pong_ssbos[output_ssbo_idx]); - if (!ubo->map_channels) + if ((ubo->shader_flags & FilterShaderFlags_MapChannels) == 0) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, cc->ping_pong_ssbos[input_ssbo_idx]); GLenum kind = cp->filters[sp->filter_slot].parameters.complex? GL_RG32F : GL_R32F; glBindImageTexture(0, cp->filters[sp->filter_slot].texture, 0, 0, 0, GL_READ_ONLY, kind); - if (ubo->map_channels) + if (ubo->shader_flags & FilterShaderFlags_MapChannels) glBindImageTexture(1, cp->textures[BeamformerComputeTextureKind_ChannelMapping], 0, 0, 0, GL_READ_ONLY, GL_R16I); glDispatchCompute(cp->demod_dispatch.x, cp->demod_dispatch.y, cp->demod_dispatch.z); @@ -768,7 +804,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame case BeamformerShaderKind_DAS: case BeamformerShaderKind_DASFast: { - BeamformerParameters *ubo = &cp->das_ubo_data; + BeamformerDASUBO *ubo = &cp->das_ubo_data; if (shader == BeamformerShaderKind_DASFast) { glClearTexImage(frame->texture, 0, GL_RED, GL_FLOAT, 0); glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT); @@ -782,20 +818,18 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame glBindImageTexture(1, cp->textures[BeamformerComputeTextureKind_SparseElements], 0, 0, 0, GL_READ_ONLY, GL_R16I); glBindImageTexture(2, cp->textures[BeamformerComputeTextureKind_FocalVectors], 0, 0, 0, GL_READ_ONLY, GL_RG32F); - m4 voxel_transform = das_voxel_transform_matrix(ubo); glProgramUniform1ui(program, DAS_CYCLE_T_UNIFORM_LOC, cycle_t++); - glProgramUniformMatrix4fv(program, DAS_VOXEL_MATRIX_LOC, 1, 0, voxel_transform.E); if (shader == BeamformerShaderKind_DASFast) { i32 loop_end; - if (ubo->das_shader_id == DASShaderKind_RCA_VLS || - ubo->das_shader_id == DASShaderKind_RCA_TPW) + if (ubo->shader_kind == DASShaderKind_RCA_VLS || + ubo->shader_kind == DASShaderKind_RCA_TPW) { /* NOTE(rnp): to avoid repeatedly sampling the whole focal vectors * texture we loop over transmits for VLS/TPW */ - loop_end = (i32)ubo->dec_data_dim[2]; + loop_end = (i32)ubo->acquisition_count; } else { - loop_end = (i32)ubo->dec_data_dim[1]; + loop_end = (i32)ubo->channel_count; } f32 percent_per_step = 1.0f / (f32)loop_end; cc->processing_progress = -percent_per_step; @@ -849,7 +883,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame assert(frame >= ctx->beamform_frames); assert(frame < ctx->beamform_frames + countof(ctx->beamform_frames)); u32 base_index = (u32)(frame - ctx->beamform_frames); - u32 to_average = (u32)cp->das_ubo_data.output_points[3]; + u32 to_average = (u32)cp->average_frames; u32 frame_count = 0; u32 *in_textures = push_array(&arena, u32, BeamformerMaxSavedFrames); ComputeFrameIterator cfi = compute_frame_iterator(ctx, 1 + base_index - to_average, to_average); @@ -882,15 +916,21 @@ shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena) case BeamformerShaderKind_DemodulateFloat: case BeamformerShaderKind_DemodulateFloatCF: { + stream_append_s8(&sb, s8("" + "layout(local_size_x = " str(FILTER_LOCAL_SIZE_X) ", " + "local_size_y = " str(FILTER_LOCAL_SIZE_Y) ", " + "local_size_z = " str(FILTER_LOCAL_SIZE_Z) ") in;\n\n" + )); + switch (ctx->kind) { case BeamformerShaderKind_FilterCF: case BeamformerShaderKind_DemodulateCF: case BeamformerShaderKind_DemodulateFloatCF: { - stream_append_s8(&sb, s8("#define COMPLEX_FILTER 1\n\n")); + stream_append_s8(&sb, s8("#define COMPLEX_FILTER 1\n")); }break; default:{ - stream_append_s8(&sb, s8("#define COMPLEX_FILTER 0\n\n")); + stream_append_s8(&sb, s8("#define COMPLEX_FILTER 0\n")); }break; } @@ -900,7 +940,7 @@ shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena) case BeamformerShaderKind_DemodulateFloat: case BeamformerShaderKind_DemodulateFloatCF: { - stream_append_s8(&sb, s8("#define INPUT_DATA_TYPE_FLOAT\n\n")); + stream_append_s8(&sb, s8("#define INPUT_DATA_TYPE_FLOAT\n")); }break; default:{}break; } @@ -911,16 +951,21 @@ shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena) case BeamformerShaderKind_DemodulateFloat: case BeamformerShaderKind_DemodulateFloatCF: { - stream_append_s8(&sb, s8("#define DEMODULATE\n\n")); + stream_append_s8(&sb, s8("#define DEMODULATE\n")); }break; default:{}break; } - stream_append_s8(&sb, s8("" - "layout(local_size_x = " str(FILTER_LOCAL_SIZE_X) ", " - "local_size_y = " str(FILTER_LOCAL_SIZE_Y) ", " - "local_size_z = " str(FILTER_LOCAL_SIZE_Z) ") in;\n\n" - )); + stream_append_byte(&sb, '\n'); + #define X(k, v, ...) "#define ShaderFlags_" #k " " #v "\n" + stream_append_s8(&sb, s8(FILTER_SHADER_FLAGS_LIST)); + #undef X + + stream_append_byte(&sb, '\n'); + #define X(k, v, ...) "#define SamplingMode_" #k " " #v "\n" + stream_append_s8(&sb, s8(SAMPLING_MODES_LIST)); + #undef X + stream_append_byte(&sb, '\n'); }break; case BeamformerShaderKind_DAS: case BeamformerShaderKind_DASFast: @@ -942,12 +987,17 @@ shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena) "layout(location = " str(DAS_FAST_CHANNEL_UNIFORM_LOC) ") uniform int u_channel;\n" )); } - #define X(type, id, pretty, fixed_tx) "#define DAS_ID_" #type " " #id "\n" stream_append_s8(&sb, s8("" - "layout(location = " str(DAS_VOXEL_MATRIX_LOC) ") uniform mat4 u_voxel_transform;\n" - "layout(location = " str(DAS_CYCLE_T_UNIFORM_LOC) ") uniform uint u_cycle_t;\n\n" - DAS_TYPES - )); + "layout(location = " str(DAS_CYCLE_T_UNIFORM_LOC) ") uniform uint u_cycle_t;\n\n")); + + #define X(k, v, ...) "#define ShaderFlags_" #k " " #v "\n" + stream_append_s8(&sb, s8(DAS_SHADER_FLAGS_LIST)); + #undef X + + stream_append_byte(&sb, '\n'); + + #define X(k, id, ...) "#define ShaderKind_" #k " " #id "\n" + stream_append_s8(&sb, s8(DAS_SHADER_KIND_LIST)); #undef X }break; case BeamformerShaderKind_Decode: @@ -1053,7 +1103,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c }break; case BeamformerShaderKind_Decode:{ read_only local_persist struct { BeamformerShaderKind kind; s8 suffix; } derivatives[] = { - #define X(k, __1, __2, suffix, ...) {BeamformerShaderKind_## k, s8(suffix)}, + #define X(k, __1, __2, suffix, ...) {BeamformerShaderKind_## k, s8_comp(suffix)}, DECODE_SHADER_VARIATIONS #undef X }; @@ -1067,8 +1117,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c }break; case BeamformerShaderKind_Filter:{ read_only local_persist struct { BeamformerShaderKind kind; s8 suffix; } derivatives[] = { - {BeamformerShaderKind_Demodulate, s8(" (Demodulate)")}, - #define X(k, __1, __2, suffix, ...) {BeamformerShaderKind_## k, s8(suffix)}, + {BeamformerShaderKind_Demodulate, s8_comp(" (Demodulate)")}, + #define X(k, __1, __2, suffix, ...) {BeamformerShaderKind_## k, s8_comp(suffix)}, FILTER_SHADER_VARIATIONS #undef X }; @@ -1140,9 +1190,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c push_compute_timing_info(ctx->compute_timing_table, (ComputeTimingInfo){.kind = ComputeTimingInfoKind_ComputeFrameBegin}); - BeamformerComputePlan *cp = beamformer_compute_plan_for_block(cs, work->compute_context.parameter_block, arena); - BeamformerParameterBlock *pb = beamformer_parameter_block(sm, work->compute_context.parameter_block); - if (pb->dirty_regions) { + BeamformerComputePlan *cp = beamformer_compute_plan_for_block(cs, work->compute_context.parameter_block, arena); + if (beamformer_parameter_block_dirty(sm, work->compute_context.parameter_block)) { u32 block = work->compute_context.parameter_block; beamformer_commit_parameter_block(ctx, cp, block, *arena); atomic_store_u32(&ctx->ui_dirty_parameter_blocks, (u32)(ctx->beamform_work_queue != q) << block); @@ -1154,19 +1203,13 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c start_renderdoc_capture(gl_context); BeamformerFrame *frame = work->compute_context.frame; - iv3 try_dim = make_valid_test_dim(cp->das_ubo_data.output_points); - if (!iv3_equal(try_dim, frame->dim)) - alloc_beamform_frame(&ctx->gl, frame, try_dim, s8("Beamformed_Data"), *arena); - - if (cp->das_ubo_data.output_points[3] > 1 && !iv3_equal(try_dim, ctx->averaged_frames[0].dim)) { - alloc_beamform_frame(&ctx->gl, ctx->averaged_frames + 0, try_dim, s8("Averaged Frame"), *arena); - alloc_beamform_frame(&ctx->gl, ctx->averaged_frames + 1, try_dim, s8("Averaged Frame"), *arena); - } + if (!iv3_equal(cp->output_points, frame->dim)) + alloc_beamform_frame(&ctx->gl, frame, cp->output_points, s8("Beamformed_Data"), *arena); - frame->min_coordinate = v4_from_f32_array(cp->das_ubo_data.output_min_coordinate); - frame->max_coordinate = v4_from_f32_array(cp->das_ubo_data.output_max_coordinate); - frame->das_shader_kind = cp->das_ubo_data.das_shader_id; - frame->compound_count = cp->das_ubo_data.dec_data_dim[2]; + frame->min_coordinate = cp->min_coordinate; + frame->max_coordinate = cp->max_coordinate; + frame->das_shader_kind = cp->das_ubo_data.shader_kind; + frame->compound_count = cp->das_ubo_data.acquisition_count; BeamformerComputeContext *cc = &ctx->compute_context; BeamformerComputePipeline *pipeline = &cp->pipeline; diff --git a/beamformer.h b/beamformer.h @@ -105,6 +105,24 @@ typedef struct { u32 texture; } BeamformerFilter; +#define DAS_SHADER_FLAGS_LIST \ + X(RxColumns, (1 << 0)) \ + X(TxColumns, (1 << 1)) \ + X(Interpolate, (1 << 2)) \ + X(CoherencyWeighting, (1 << 3)) + +#define X(k, v, ...) DASShaderFlags_## k = v, +typedef enum {DAS_SHADER_FLAGS_LIST} DASShaderFlags; +#undef X + +static_assert(BeamformerSamplingMode_Count < 4, "filter sample mode mask borked"); +#define FILTER_SHADER_FLAGS_LIST \ + X(SamplingModeMask, ((1 << 0) | (1 << 1))) \ + X(MapChannels, (1 << 2)) +#define X(k, v, ...) FilterShaderFlags_## k = v, +typedef enum {FILTER_SHADER_FLAGS_LIST} FilterShaderFlags; +#undef X + /* X(name, type, gltype) */ #define BEAMFORMER_FILTER_UBO_PARAM_LIST \ X(input_channel_stride, u32, uint) \ @@ -114,7 +132,7 @@ typedef struct { X(output_sample_stride, u32, uint) \ X(output_transmit_stride, u32, uint) \ X(decimation_rate, u32, uint) \ - X(map_channels, b32, bool) \ + X(shader_flags, u32, int) \ X(demodulation_frequency, f32, float) \ X(sampling_frequency, f32, float) @@ -129,6 +147,22 @@ typedef struct { X(transmit_count, u32, uint) \ X(decode_mode, u32, uint) +/* X(name, type, gltype) */ +#define BEAMFORMER_DAS_UBO_PARAM_LIST \ + X(voxel_transform, m4, mat4) \ + X(xdc_transform, m4, mat4) \ + X(xdc_element_pitch, v2, vec2) \ + X(sampling_frequency, f32, float) \ + X(demodulation_frequency, f32, float) \ + X(speed_of_sound, f32, float) \ + X(time_offset, f32, float) \ + X(f_number, f32, float) \ + X(shader_flags, u32, int) \ + X(shader_kind, u32, uint) \ + X(sample_count, u32, uint) \ + X(channel_count, u32, uint) \ + X(acquisition_count, u32, uint) + typedef alignas(16) struct { #define X(name, type, ...) type name; BEAMFORMER_DECODE_UBO_PARAM_LIST @@ -144,13 +178,20 @@ typedef alignas(16) struct { } BeamformerFilterUBO; static_assert((sizeof(BeamformerFilterUBO) & 15) == 0, "UBO size must be a multiple of 16"); +typedef alignas(16) struct { + #define X(name, type, ...) type name; + BEAMFORMER_DAS_UBO_PARAM_LIST + #undef X +} BeamformerDASUBO; +static_assert((sizeof(BeamformerDASUBO) & 15) == 0, "UBO size must be a multiple of 16"); + /* TODO(rnp): das should remove redundant info and add voxel transform */ /* TODO(rnp): need 1 UBO per filter slot */ #define BEAMFORMER_COMPUTE_UBO_LIST \ - X(DAS, BeamformerParameters, das) \ - X(Decode, BeamformerDecodeUBO, decode) \ - X(Filter, BeamformerFilterUBO, filter) \ - X(Demodulate, BeamformerFilterUBO, demod) + X(DAS, BeamformerDASUBO, das) \ + X(Decode, BeamformerDecodeUBO, decode) \ + X(Filter, BeamformerFilterUBO, filter) \ + X(Demodulate, BeamformerFilterUBO, demod) #define X(k, ...) BeamformerComputeUBOKind_##k, typedef enum {BEAMFORMER_COMPUTE_UBO_LIST BeamformerComputeUBOKind_Count} BeamformerComputeUBOKind; @@ -181,6 +222,11 @@ struct BeamformerComputePlan { u32 rf_size; i32 hadamard_order; + v3 min_coordinate; + v3 max_coordinate; + iv3 output_points; + i32 average_frames; + u32 textures[BeamformerComputeTextureKind_Count]; u32 ubos[BeamformerComputeUBOKind_Count]; @@ -229,8 +275,8 @@ typedef struct { } BeamformerComputeContext; typedef enum { - #define X(type, id, pretty, fixed_tx) DASShaderKind_##type = id, - DAS_TYPES + #define X(type, id, ...) DASShaderKind_##type = id, + DAS_SHADER_KIND_LIST #undef X DASShaderKind_Count } DASShaderKind; @@ -285,8 +331,8 @@ struct BeamformerFrame { /* NOTE: for use when displaying either prebeamformed frames or on the current frame * when we intend to recompute on the next frame */ - v4 min_coordinate; - v4 max_coordinate; + v3 min_coordinate; + v3 max_coordinate; // metadata u32 id; diff --git a/beamformer_parameters.h b/beamformer_parameters.h @@ -2,7 +2,7 @@ #include <stdint.h> /* TODO(rnp): - * [ ]: Have a method for the library caller to take ownership of a "compute context" + * [ ]: shader kinds have ballooned; shader stats table needs to be compressed * [ ]: Upload previously exported data for display. maybe this is a UI thing but doing it * programatically would be nice. * [ ]: Add interface for multi frame upload. RF upload already uses an offset into SM so @@ -61,6 +61,31 @@ typedef struct { X(NONE, 0, "None") \ X(HADAMARD, 1, "Hadamard") +#define SAMPLING_MODES_LIST \ + X(NS200BW, 0) \ + X(BS100BW, 1) \ + X(BS50BW, 2) + +#define TRANSMIT_MODES_LIST \ + X(Rows) \ + X(Columns) + +#define RECEIVE_MODES_LIST \ + X(Rows) \ + X(Columns) + +#define X(k, ...) BeamformerTransmitMode_## k, +typedef enum {TRANSMIT_MODES_LIST} BeamformerTransmitModes; +#undef X + +#define X(k, ...) BeamformerReceiveMode_## k, +typedef enum {RECEIVE_MODES_LIST} BeamformerReceiveModes; +#undef X + +#define X(k, v, ...) BeamformerSamplingMode_## k = v, +typedef enum {SAMPLING_MODES_LIST BeamformerSamplingMode_Count} BeamformerSamplingModes; +#undef X + #define BEAMFORMER_DATA_KIND_LIST \ X(Int16, 0) \ X(Int16Complex, 1) \ @@ -96,7 +121,7 @@ typedef enum { } BeamformerViewPlaneTag; /* X(type, id, pretty name, fixed transmits) */ -#define DAS_TYPES \ +#define DAS_SHADER_KIND_LIST \ X(FORCES, 0, "FORCES", 1) \ X(UFORCES, 1, "UFORCES", 0) \ X(HERCULES, 2, "HERCULES", 1) \ @@ -107,7 +132,7 @@ typedef enum { X(EPIC_FORCES, 7, "EPIC-FORCES", 1) \ X(EPIC_UFORCES, 8, "EPIC-UFORCES", 0) \ X(EPIC_UHERCULES, 9, "EPIC-UHERCULES", 0) \ - X(FLASH, 10, "Flash", 0) + X(Flash, 10, "Flash", 0) #define FILTER_LOCAL_SIZE_X 64 #define FILTER_LOCAL_SIZE_Y 1 @@ -129,8 +154,7 @@ typedef enum { #define DAS_VOXEL_OFFSET_UNIFORM_LOC 2 #define DAS_CYCLE_T_UNIFORM_LOC 3 -#define DAS_VOXEL_MATRIX_LOC 4 -#define DAS_FAST_CHANNEL_UNIFORM_LOC 5 +#define DAS_FAST_CHANNEL_UNIFORM_LOC 4 #define MIN_MAX_MIPS_LEVEL_UNIFORM_LOC 1 #define SUM_PRESCALE_UNIFORM_LOC 1 @@ -146,58 +170,45 @@ typedef enum { enum {BEAMFORMER_CONSTANTS_LIST}; #undef X -/* TODO(rnp): actually use a substruct but generate a header compatible with MATLAB */ -/* X(name, type, size, elements, gltype, glsize, comment) */ +/* X(name, type, size, elements, comment) */ +#define BEAMFORMER_PARAMS_HEAD \ + X(xdc_transform, float, [16], 16, "IMPORTANT: column major order") \ + X(xdc_element_pitch, float, [2], 2, "[m] Transducer Element Pitch {row, col}") \ + X(raw_data_dimensions, uint32_t, [2], 2, "Raw Data Dimensions") \ + X(sample_count, uint32_t, , 1, "") \ + X(channel_count, uint32_t, , 1, "") \ + X(acquisition_count, uint32_t, , 1, "") \ + X(das_shader_id, uint32_t, , 1, "") \ + X(time_offset, float, , 1, "pulse length correction time [s]") \ + X(decode, uint8_t, , 1, "Decode or just reshape data") \ + X(transmit_mode, uint8_t, , 1, "Method/Orientation of Transmit") \ + X(receive_mode, uint8_t, , 1, "Method/Orientation of Receive") \ + X(sampling_mode, uint8_t, , 1, "") + #define BEAMFORMER_UI_PARAMS \ - X(output_min_coordinate, float, [4], 4, vec4, , "/* [m] Back-Top-Left corner of output region */") \ - X(output_max_coordinate, float, [4], 4, vec4, , "/* [m] Front-Bottom-Right corner of output region */") \ - X(output_points, int32_t, [4], 4, uvec4, , "/* Width * Height * Depth * (Frame Average Count) */") \ - X(sampling_frequency, float, , 1, float, , "/* [Hz] */") \ - X(center_frequency, float, , 1, float, , "/* [Hz] */") \ - X(speed_of_sound, float, , 1, float, , "/* [m/s] */") \ - X(off_axis_pos, float, , 1, float, , "/* [m] Position on screen normal to beamform in TPW/VLSHERCULES */") \ - X(beamform_plane, int32_t, , 1, int, , "/* Plane to Beamform in TPW/VLS/HERCULES */") \ - X(f_number, float, , 1, float, , "/* F# (set to 0 to disable) */") \ - X(interpolate, uint32_t, , 1, bool, , "/* Perform Cubic Interpolation of RF Samples */") \ - X(coherency_weighting, uint32_t, , 1, bool, , "/* Apply coherency weighting to output data */") \ - X(decimation_rate, uint32_t, , 1, uint, , "/* Number of times to decimate */") + X(output_min_coordinate, float, [3], 3, "[m] Back-Top-Left corner of output region") \ + X(output_max_coordinate, float, [3], 3, "[m] Front-Bottom-Right corner of output region") \ + X(output_points, int32_t, [4], 4, "Width * Height * Depth * (Frame Average Count)") \ + X(sampling_frequency, float, , 1, "[Hz]") \ + X(demodulation_frequency, float, , 1, "[Hz]") \ + X(speed_of_sound, float, , 1, "[m/s]") \ + X(f_number, float, , 1, "F# (set to 0 to disable)") \ + X(off_axis_pos, float, , 1, "[m] Position on screen normal to beamform in TPW/VLS/HERCULES") \ + X(interpolate, uint32_t, , 1, "Perform Cubic Interpolation of RF Samples") \ + X(coherency_weighting, uint32_t, , 1, "Apply coherency weighting to output data") \ + X(beamform_plane, uint32_t, , 1, "Plane to Beamform in TPW/VLS/HERCULES") \ + X(decimation_rate, uint32_t, , 1, "Number of times to decimate") + +#define X(name, type, size, ...) type name size; +typedef struct {BEAMFORMER_PARAMS_HEAD} BeamformerParametersHead; +typedef struct {BEAMFORMER_UI_PARAMS} BeamformerUIParameters; -#define BEAMFORMER_PARAMS_HEAD \ - X(xdc_transform, float, [16], 16, mat4, , "/* IMPORTANT: column major order */") \ - X(dec_data_dim, uint32_t, [4] , 4, ivec4, , "/* Samples * Channels * Acquisitions; last element ignored */") \ - X(xdc_element_pitch, float, [2] , 2, vec2, , "/* [m] Transducer Element Pitch {row, col} */") \ - X(rf_raw_dim, uint32_t, [2] , 2, ivec2, , "/* Raw Data Dimensions */") \ - X(transmit_mode, int32_t, , 1, int, , "/* Method/Orientation of Transmit */") \ - X(decode, uint32_t, , 1, uint, , "/* Decode or just reshape data */") \ - X(das_shader_id, uint32_t, , 1, uint, , "") \ - X(time_offset, float, , 1, float, , "/* pulse length correction time [s] */") - -#define BEAMFORMER_PARAMS_TAIL \ - X(readi_group_id, uint32_t, , 1, uint, , "/* Which readi group this data is from */") \ - X(readi_group_size, uint32_t, , 1, uint, , "/* Size of readi transmit group */") - -#define X(name, type, size, __e, gltype, glsize, comment) type name size; -typedef struct { BEAMFORMER_UI_PARAMS } BeamformerUIParameters; -typedef struct { BEAMFORMER_PARAMS_HEAD } BeamformerParametersHead; -typedef struct { BEAMFORMER_PARAMS_TAIL } BeamformerParametersTail; - -/* NOTE: This struct follows the OpenGL std140 layout. DO NOT modify unless you have - * read and understood the rules, particulary with regards to _member alignment_ */ typedef struct { BEAMFORMER_PARAMS_HEAD BEAMFORMER_UI_PARAMS - BEAMFORMER_PARAMS_TAIL - float _pad[1]; } BeamformerParameters; #undef X -/* NOTE(rnp): keep this header importable for old C versions */ -#if __STDC_VERSION__ >= 201112L -_Static_assert((offsetof(BeamformerParameters, output_min_coordinate) & 15) == 0, - "BeamformerParameters.output_min_coordinate must lie on a 16 byte boundary"); -_Static_assert((sizeof(BeamformerParameters) & 15) == 0, "UBO size must be a multiple of 16"); -#endif - #define BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST \ X(ImagePlaneOffsets, 0) \ X(TransmitPower, 1) \ diff --git a/beamformer_shared_memory.c b/beamformer_shared_memory.c @@ -1,5 +1,5 @@ /* See LICENSE for license details. */ -#define BEAMFORMER_SHARED_MEMORY_VERSION (13UL) +#define BEAMFORMER_SHARED_MEMORY_VERSION (14UL) typedef struct BeamformerFrame BeamformerFrame; typedef struct ShaderReloadContext ShaderReloadContext; @@ -126,7 +126,6 @@ typedef struct { struct { BeamformerParametersHead parameters_head; BeamformerUIParameters parameters_ui; - BeamformerParametersTail parameters_tail; }; }; @@ -235,6 +234,13 @@ beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block) return result; } +function b32 +beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block) +{ + b32 result = beamformer_parameter_block(sm, block)->dirty_regions != 0; + return result; +} + function BeamformerParameterBlock * beamformer_parameter_block_lock(SharedMemoryRegion *sm, u32 block, i32 timeout_ms) { diff --git a/build.c b/build.c @@ -860,6 +860,27 @@ build_matlab_bindings(Arena arena) result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerShaderStage.m")); #undef X + #define X(kind, ...) meta_push_matlab_enum_with_value(&m, s8(#kind), BeamformerTransmitMode_## kind); + meta_begin_matlab_class(&m, "OGLBeamformerTransmitModes", "int32"); + meta_begin_scope(&m, s8("enumeration")); + TRANSMIT_MODES_LIST + result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerTransmitModes.m")); + #undef X + + #define X(kind, ...) meta_push_matlab_enum_with_value(&m, s8(#kind), BeamformerReceiveMode_## kind); + meta_begin_matlab_class(&m, "OGLBeamformerReceiveModes", "int32"); + meta_begin_scope(&m, s8("enumeration")); + RECEIVE_MODES_LIST + result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerReceiveModes.m")); + #undef X + + #define X(kind, v, ...) meta_push_line(&m, s8(#kind " (" #v ")")); + meta_begin_matlab_class(&m, "OGLBeamformerSamplingModes", "int32"); + meta_begin_scope(&m, s8("enumeration")); + SAMPLING_MODES_LIST + result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerSamplingModes.m")); + #undef X + os_make_directory(OUTPUT("matlab/+OGLBeamformerFilter")); #define X(kind, ...) {OUTPUT("matlab/+OGLBeamformerFilter/" #kind ".m"), s8(#kind), s8(#__VA_ARGS__)}, read_only local_persist struct {char *out; s8 class, args;} filter_table[] = { @@ -905,7 +926,6 @@ build_matlab_bindings(Arena arena) meta_begin_scope(&m, s8("properties")); BEAMFORMER_PARAMS_HEAD BEAMFORMER_UI_PARAMS - BEAMFORMER_PARAMS_TAIL result &= meta_end_and_write_matlab(&m, OUTPUT("matlab/OGLBeamformerParameters.m")); meta_begin_matlab_class(&m, "OGLBeamformerParametersHead"); diff --git a/math.c b/math.c @@ -221,6 +221,16 @@ cross(v3 a, v3 b) } function v3 +v3_from_f32_array(f32 v[3]) +{ + v3 result; + result.E[0] = v[0]; + result.E[1] = v[1]; + result.E[2] = v[2]; + return result; +} + +function v3 v3_abs(v3 a) { v3 result; @@ -296,17 +306,6 @@ v3_normalize(v3 a) } function v4 -v4_from_f32_array(f32 v[4]) -{ - v4 result; - result.E[0] = v[0]; - result.E[1] = v[1]; - result.E[2] = v[2]; - result.E[3] = v[3]; - return result; -} - -function v4 v4_scale(v4 a, f32 scale) { v4 result; diff --git a/shaders/das.glsl b/shaders/das.glsl @@ -14,17 +14,11 @@ layout(rg32f, binding = 2) readonly restrict uniform image1D focal_vectors; #define C_SPLINE 0.5 -#define TX_ROWS 0 -#define TX_COLS 1 - -#define TX_MODE_TX_COLS(a) (((a) & 2) != 0) -#define TX_MODE_RX_COLS(a) (((a) & 1) != 0) - vec2 rotate_iq(vec2 iq, float time) { vec2 result = iq; - if (center_frequency > 0) { - float arg = radians(360) * center_frequency * time; + if (demodulation_frequency > 0) { + float arg = radians(360) * demodulation_frequency * time; mat2 phasor = mat2( cos(arg), sin(arg), -sin(arg), cos(arg)); result = phasor * iq; @@ -35,7 +29,7 @@ vec2 rotate_iq(vec2 iq, float time) /* NOTE: See: https://cubic.org/docs/hermite.htm */ vec2 cubic(int base_index, float index) { - mat4 h = mat4( + const mat4 h = mat4( 2, -3, 0, 1, -2, 3, 0, 0, 1, -2, 1, 0, @@ -63,8 +57,9 @@ vec2 cubic(int base_index, float index) vec2 sample_rf(int channel, int transmit, float index) { - vec2 result = vec2(index >= 0.0f) * vec2((int(index) + 1 + int(interpolate)) < dec_data_dim.x); - int base_index = channel * dec_data_dim.x * dec_data_dim.z + transmit * dec_data_dim.x; + bool interpolate = bool(shader_flags & ShaderFlags_Interpolate); + vec2 result = vec2(index >= 0.0f) * vec2((int(index) + 1 + int(interpolate)) < sample_count); + int base_index = int(channel * sample_count * acquisition_count + transmit * sample_count); if (interpolate) result *= cubic(base_index, index); else result *= rf_data[base_index + int(round(index))]; result = rotate_iq(result, index / sampling_frequency); @@ -110,8 +105,8 @@ float cylindrical_wave_transmit_distance(vec3 point, float focal_depth, float tr #if DAS_FAST vec3 RCA(vec3 world_point) { - bool tx_rows = !TX_MODE_TX_COLS(transmit_mode); - bool rx_rows = !TX_MODE_RX_COLS(transmit_mode); + bool tx_rows = bool((shader_flags & ShaderFlags_TxColumns) == 0); + bool rx_rows = bool((shader_flags & ShaderFlags_RxColumns) == 0); vec2 xdc_world_point = rca_plane_projection((xdc_transform * vec4(world_point, 1)).xyz, rx_rows); vec2 focal_vector = imageLoad(focal_vectors, u_channel).xy; float transmit_angle = radians(focal_vector.x); @@ -126,7 +121,7 @@ vec3 RCA(vec3 world_point) } vec2 result = vec2(0); - for (int channel = 0; channel < dec_data_dim.y; channel++) { + for (int channel = 0; channel < channel_count; channel++) { vec2 receive_vector = xdc_world_point - rca_plane_projection(vec3(channel * xdc_element_pitch, 0), rx_rows); float receive_distance = length(receive_vector); float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.y) * receive_vector.x); @@ -141,12 +136,12 @@ vec3 RCA(vec3 world_point) #else vec3 RCA(vec3 world_point) { - bool tx_rows = !TX_MODE_TX_COLS(transmit_mode); - bool rx_rows = !TX_MODE_RX_COLS(transmit_mode); + bool tx_rows = bool((shader_flags & ShaderFlags_TxColumns) == 0); + bool rx_rows = bool((shader_flags & ShaderFlags_RxColumns) == 0); vec2 xdc_world_point = rca_plane_projection((xdc_transform * vec4(world_point, 1)).xyz, rx_rows); vec3 sum = vec3(0); - for (int transmit = 0; transmit < dec_data_dim.z; transmit++) { + for (int transmit = 0; transmit < acquisition_count; transmit++) { vec2 focal_vector = imageLoad(focal_vectors, transmit).xy; float transmit_angle = radians(focal_vector.x); float focal_depth = focal_vector.y; @@ -159,7 +154,7 @@ vec3 RCA(vec3 world_point) transmit_angle, tx_rows); } - for (int rx_channel = 0; rx_channel < dec_data_dim.y; rx_channel++) { + for (int rx_channel = 0; rx_channel < channel_count; rx_channel++) { vec3 rx_center = vec3(rx_channel * xdc_element_pitch, 0); vec2 receive_vector = xdc_world_point - rca_plane_projection(rx_center, rx_rows); float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.y) * receive_vector.x); @@ -178,10 +173,10 @@ vec3 RCA(vec3 world_point) #if DAS_FAST vec3 HERCULES(vec3 world_point) { - bool uhercules = das_shader_id == DAS_ID_UHERCULES; + bool uhercules = shader_kind == ShaderKind_UHERCULES; vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; - bool tx_rows = !TX_MODE_TX_COLS(transmit_mode); - bool rx_cols = TX_MODE_RX_COLS(transmit_mode); + bool tx_rows = bool((shader_flags & ShaderFlags_TxColumns) == 0); + bool rx_cols = bool((shader_flags & ShaderFlags_RxColumns)); vec2 focal_vector = imageLoad(focal_vectors, 0).xy; float transmit_angle = radians(focal_vector.x); float focal_depth = focal_vector.y; @@ -195,7 +190,7 @@ vec3 HERCULES(vec3 world_point) } vec2 result = vec2(0); - for (int transmit = int(uhercules); transmit < dec_data_dim.z; transmit++) { + for (int transmit = int(uhercules); transmit < acquisition_count; transmit++) { int tx_channel = uhercules ? imageLoad(sparse_elements, transmit - int(uhercules)).x : transmit; vec3 element_position; if (rx_cols) element_position = vec3(u_channel, tx_channel, 0) * vec3(xdc_element_pitch, 0); @@ -205,7 +200,7 @@ vec3 HERCULES(vec3 world_point) distance(xdc_world_point.xy, element_position.xy)); if (apodization > 0) { /* NOTE: tribal knowledge */ - if (transmit == 0) apodization *= inversesqrt(dec_data_dim.z); + if (transmit == 0) apodization *= inversesqrt(acquisition_count); float sidx = sample_index(transmit_distance + distance(xdc_world_point, element_position)); result += apodization * sample_rf(u_channel, transmit, sidx); @@ -216,10 +211,10 @@ vec3 HERCULES(vec3 world_point) #else vec3 HERCULES(vec3 world_point) { - bool uhercules = das_shader_id == DAS_ID_UHERCULES; + bool uhercules = shader_kind == ShaderKind_UHERCULES; vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; - bool tx_rows = !TX_MODE_TX_COLS(transmit_mode); - bool rx_cols = TX_MODE_RX_COLS(transmit_mode); + bool tx_rows = bool((shader_flags & ShaderFlags_TxColumns) == 0); + bool rx_cols = bool((shader_flags & ShaderFlags_RxColumns)); vec2 focal_vector = imageLoad(focal_vectors, 0).xy; float transmit_angle = radians(focal_vector.x); float focal_depth = focal_vector.y; @@ -233,9 +228,9 @@ vec3 HERCULES(vec3 world_point) } vec3 result = vec3(0); - for (int transmit = int(uhercules); transmit < dec_data_dim.z; transmit++) { + for (int transmit = int(uhercules); transmit < acquisition_count; transmit++) { int tx_channel = uhercules ? imageLoad(sparse_elements, transmit - int(uhercules)).x : transmit; - for (int rx_channel = 0; rx_channel < dec_data_dim.y; rx_channel++) { + for (int rx_channel = 0; rx_channel < channel_count; rx_channel++) { vec3 element_position; if (rx_cols) element_position = vec3(rx_channel, tx_channel, 0) * vec3(xdc_element_pitch, 0); else element_position = vec3(tx_channel, rx_channel, 0) * vec3(xdc_element_pitch, 0); @@ -244,7 +239,7 @@ vec3 HERCULES(vec3 world_point) distance(xdc_world_point.xy, element_position.xy)); if (apodization > 0) { /* NOTE: tribal knowledge */ - if (transmit == 0) apodization *= inversesqrt(dec_data_dim.z); + if (transmit == 0) apodization *= inversesqrt(acquisition_count); float sidx = sample_index(transmit_distance + distance(xdc_world_point, element_position)); vec2 value = apodization * sample_rf(rx_channel, transmit, sidx); @@ -259,7 +254,7 @@ vec3 HERCULES(vec3 world_point) #if DAS_FAST vec3 FORCES(vec3 world_point) { - bool uforces = das_shader_id == DAS_ID_UFORCES; + bool uforces = shader_kind == ShaderKind_UFORCES; vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; float receive_distance = distance(xdc_world_point.xz, vec2(u_channel * xdc_element_pitch.x, 0)); float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.z) * @@ -267,9 +262,9 @@ vec3 FORCES(vec3 world_point) vec2 result = vec2(0); if (apodization > 0) { - for (int transmit = int(uforces); transmit < dec_data_dim.z; transmit++) { + for (int transmit = int(uforces); transmit < acquisition_count; transmit++) { int tx_channel = uforces ? imageLoad(sparse_elements, transmit - int(uforces)).x : transmit; - vec3 transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(dec_data_dim.y / 2)), 0); + vec3 transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(channel_count / 2)), 0); float sidx = sample_index(distance(xdc_world_point, transmit_center) + receive_distance); result += apodization * sample_rf(u_channel, transmit, sidx); @@ -280,18 +275,18 @@ vec3 FORCES(vec3 world_point) #else vec3 FORCES(vec3 world_point) { - bool uforces = das_shader_id == DAS_ID_UFORCES; + bool uforces = shader_kind == ShaderKind_UFORCES; vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; vec3 result = vec3(0); - for (int rx_channel = 0; rx_channel < dec_data_dim.y; rx_channel++) { + for (int rx_channel = 0; rx_channel < channel_count; rx_channel++) { float receive_distance = distance(xdc_world_point.xz, vec2(rx_channel * xdc_element_pitch.x, 0)); float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.z) * (xdc_world_point.x - rx_channel * xdc_element_pitch.x)); if (apodization > 0) { - for (int transmit = int(uforces); transmit < dec_data_dim.z; transmit++) { + for (int transmit = int(uforces); transmit < acquisition_count; transmit++) { int tx_channel = uforces ? imageLoad(sparse_elements, transmit - int(uforces)).x : transmit; - vec3 transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(dec_data_dim.y / 2)), 0); + vec3 transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(channel_count / 2)), 0); float sidx = sample_index(distance(xdc_world_point, transmit_center) + receive_distance); vec2 value = apodization * sample_rf(rx_channel, tx_channel, sidx); @@ -313,29 +308,30 @@ void main() out_voxel += u_voxel_offset; #endif - vec3 world_point = (u_voxel_transform * vec4(out_voxel, 1)).xyz; + vec3 world_point = (voxel_transform * vec4(out_voxel, 1)).xyz; - switch (das_shader_id) { - case DAS_ID_FORCES: - case DAS_ID_UFORCES: + switch (shader_kind) { + case ShaderKind_FORCES: + case ShaderKind_UFORCES: { sum += FORCES(world_point); }break; - case DAS_ID_HERCULES: - case DAS_ID_UHERCULES: + case ShaderKind_HERCULES: + case ShaderKind_UHERCULES: { sum += HERCULES(world_point); }break; - case DAS_ID_FLASH: - case DAS_ID_RCA_TPW: - case DAS_ID_RCA_VLS: + case ShaderKind_Flash: + case ShaderKind_RCA_TPW: + case ShaderKind_RCA_VLS: { sum += RCA(world_point); }break; } /* TODO(rnp): scale such that brightness remains ~constant */ - if (coherency_weighting) sum.xy *= sum.xy / (sum.z + float(sum.z == 0)); + if (bool(shader_flags & ShaderFlags_CoherencyWeighting)) + sum.xy *= sum.xy / (sum.z + float(sum.z == 0)); imageStore(u_out_data_tex, out_voxel, vec4(sum.xy, 0, 0)); } diff --git a/shaders/filter.glsl b/shaders/filter.glsl @@ -36,14 +36,32 @@ vec2 complex_mul(vec2 a, vec2 b) vec2 rotate_iq(vec2 iq, int index) { - float arg = radians(360) * demodulation_frequency * index / sampling_frequency; - /* TODO(rnp): this can be optimized based on the sampling mode. for 4x sampling - * (NS200BW) coefficients cycle through (cos) {1, 0, -1, 0} (sin) {0, -1, 0, 1} - * so we don't actually need to use the special function unit. There should be an - * equivalent for BS100BW and BS50BW as well */ - mat2 phasor = mat2(cos(arg), -sin(arg), - sin(arg), cos(arg)); - vec2 result = phasor * iq; + vec2 result; + /* TODO(rnp): this doesn't give us the same performance boost as hardcoding the mode */ + switch (shader_flags & ShaderFlags_SamplingModeMask) { + case SamplingMode_NS200BW:{ + // fs = 2 * fd + // arg = PI * index + // cos -> 1 -1 1 -1 + // sin -> 0 0 0 0 + /* NOTE(rnp): faster than taking iq or -iq, good job shader compiler */ + if (bool(index & 1)) result = mat2(-1, 0, 0, -1) * iq; + else result = mat2( 1, 0, 0, 1) * iq; + }break; + case SamplingMode_BS100BW:{ + // fs = fd + // arg = 2 * PI * index + // cos -> 1 1 1 1 + // sin -> 0 0 0 0 + result = iq; + }break; + default:{ + float arg = radians(360) * demodulation_frequency * index / sampling_frequency; + mat2 phasor = mat2(cos(arg), -sin(arg), + sin(arg), cos(arg)); + result = phasor * iq; + }break; + } return result; } @@ -60,6 +78,7 @@ void main() uint channel = gl_GlobalInvocationID.y; uint transmit = gl_GlobalInvocationID.z; + bool map_channels = bool(shader_flags & ShaderFlags_MapChannels); uint in_channel = map_channels ? imageLoad(channel_mapping, int(channel)).x : channel; uint in_offset = input_channel_stride * in_channel + input_transmit_stride * transmit; uint out_offset = output_channel_stride * channel + diff --git a/static.c b/static.c @@ -411,16 +411,12 @@ setup_beamformer(Arena *memory, BeamformerCtx **o_ctx, BeamformerInput **o_input #endif read_only local_persist s8 compute_headers[BeamformerShaderKind_ComputeCount] = { - #define X(name, type, size, __e, gltype, glsize, comment) "\t" #gltype " " #name #glsize "; " comment "\n" + /* X(name, type, gltype) */ + #define X(name, t, gltype) "\t" #gltype " " #name ";\n" [BeamformerShaderKind_DAS] = s8_comp("layout(std140, binding = 0) uniform parameters {\n" - BEAMFORMER_PARAMS_HEAD - BEAMFORMER_UI_PARAMS - BEAMFORMER_PARAMS_TAIL + BEAMFORMER_DAS_UBO_PARAM_LIST "};\n\n" ), - #undef X - /* X(name, type, gltype) */ - #define X(name, t, gltype) "\t" #gltype " " #name ";\n" [BeamformerShaderKind_Decode] = s8_comp("layout(std140, binding = 0) uniform parameters {\n" BEAMFORMER_DECODE_UBO_PARAM_LIST "};\n\n" diff --git a/tests/throughput.c b/tests/throughput.c @@ -196,20 +196,23 @@ read_zemp_bp_v1(u8 *path) } function void -fill_beamformer_parameters_from_zemp_bp_v1(zemp_bp_v1 *zbp, BeamformerParameters *out) +beamformer_parameters_from_zemp_bp_v1(zemp_bp_v1 *zbp, BeamformerParameters *out) { - mem_copy(out->xdc_transform, zbp->xdc_transform, sizeof(out->xdc_transform)); - mem_copy(out->dec_data_dim, zbp->decoded_data_dim, sizeof(out->dec_data_dim)); - mem_copy(out->xdc_element_pitch, zbp->xdc_element_pitch, sizeof(out->xdc_element_pitch)); - mem_copy(out->rf_raw_dim, zbp->raw_data_dim, sizeof(out->rf_raw_dim)); - - out->transmit_mode = zbp->transmit_mode; - out->decode = zbp->decode_mode; - out->das_shader_id = zbp->beamform_mode; - out->time_offset = zbp->time_offset; - out->sampling_frequency = zbp->sampling_frequency; - out->center_frequency = zbp->center_frequency; - out->speed_of_sound = zbp->speed_of_sound; + mem_copy(out->xdc_transform, zbp->xdc_transform, sizeof(out->xdc_transform)); + mem_copy(out->xdc_element_pitch, zbp->xdc_element_pitch, sizeof(out->xdc_element_pitch)); + mem_copy(out->raw_data_dimensions, zbp->raw_data_dim, sizeof(out->raw_data_dimensions)); + + out->sample_count = zbp->decoded_data_dim[0]; + out->channel_count = zbp->decoded_data_dim[1]; + out->acquisition_count = zbp->decoded_data_dim[2]; + out->transmit_mode = (u8)((zbp->transmit_mode & 2) >> 1); + out->receive_mode = (u8)((zbp->transmit_mode & 1) >> 0); + out->decode = (u8)zbp->decode_mode; + out->das_shader_id = zbp->beamform_mode; + out->time_offset = zbp->time_offset; + out->sampling_frequency = zbp->sampling_frequency; + out->demodulation_frequency = zbp->center_frequency; + out->speed_of_sound = zbp->speed_of_sound; } #define shift_n(v, c, n) v += n, c -= n @@ -290,7 +293,7 @@ decompress_data_at_work_index(Stream *path_base, u32 index) function b32 send_frame(i16 *restrict i16_data, BeamformerParameters *restrict bp) { - u32 data_size = bp->rf_raw_dim[0] * bp->rf_raw_dim[1] * sizeof(i16); + u32 data_size = bp->raw_data_dimensions[0] * bp->raw_data_dimensions[1] * sizeof(i16); b32 result = beamformer_push_data_with_compute(i16_data, data_size, BeamformerViewPlaneTag_XZ, 0); if (!result && !g_should_exit) printf("lib error: %s\n", beamformer_get_last_error_string()); @@ -314,7 +317,7 @@ execute_study(s8 study, Arena arena, Stream path, Options *options) if (!zbp) die("failed to unpack parameters file\n"); BeamformerParameters bp = {0}; - fill_beamformer_parameters_from_zemp_bp_v1(zbp, &bp); + beamformer_parameters_from_zemp_bp_v1(zbp, &bp); mem_copy(bp.output_points, g_output_points, sizeof(bp.output_points)); bp.output_points[3] = 1; @@ -322,16 +325,28 @@ execute_study(s8 study, Arena arena, Stream path, Options *options) bp.output_min_coordinate[0] = g_lateral_extent.x; bp.output_min_coordinate[1] = 0; bp.output_min_coordinate[2] = g_axial_extent.x; - bp.output_min_coordinate[3] = 0; bp.output_max_coordinate[0] = g_lateral_extent.y; bp.output_max_coordinate[1] = 0; bp.output_max_coordinate[2] = g_axial_extent.y; - bp.output_max_coordinate[3] = 0; bp.f_number = g_f_number; bp.beamform_plane = 0; - bp.interpolate = 0; + bp.interpolate = 1; + + bp.decimation_rate = 1; + bp.demodulation_frequency = bp.sampling_frequency / 4; + + BeamformerFilterParameters kaiser = {0}; + kaiser.Kaiser.beta = 5.65f; + kaiser.Kaiser.cutoff_frequency = 2.0e6f; + kaiser.Kaiser.length = 36; + + f32 kaiser_parameters[sizeof(kaiser.Kaiser) / sizeof(f32)]; + mem_copy(kaiser_parameters, &kaiser.Kaiser, sizeof(kaiser.Kaiser)); + beamformer_create_filter(BeamformerFilterKind_Kaiser, kaiser_parameters, + countof(kaiser_parameters), bp.sampling_frequency / 2, 0, 0, 0); + beamformer_set_pipeline_stage_parameters(0, 0); if (zbp->sparse_elements[0] == -1) { for (i16 i = 0; i < countof(zbp->sparse_elements); i++) @@ -349,10 +364,9 @@ execute_study(s8 study, Arena arena, Stream path, Options *options) beamformer_push_sparse_elements(zbp->sparse_elements, countof(zbp->sparse_elements)); beamformer_push_parameters(&bp); - free(zbp); - i32 shader_stages[16]; u32 shader_stage_count = 0; + shader_stages[shader_stage_count++] = BeamformerShaderKind_Demodulate; if (options->cuda) shader_stages[shader_stage_count++] = BeamformerShaderKind_CudaDecode; else shader_stages[shader_stage_count++] = BeamformerShaderKind_Decode; shader_stages[shader_stage_count++] = BeamformerShaderKind_DAS; @@ -370,7 +384,7 @@ execute_study(s8 study, Arena arena, Stream path, Options *options) u32 frame = 0; f32 times[32] = {0}; - f32 data_size = (f32)(bp.rf_raw_dim[0] * bp.rf_raw_dim[1] * sizeof(*data)); + f32 data_size = (f32)(bp.raw_data_dimensions[0] * bp.raw_data_dimensions[1] * sizeof(*data)); f64 start = os_get_time(); for (;!g_should_exit;) { if (send_frame(data, &bp)) { @@ -397,9 +411,11 @@ execute_study(s8 study, Arena arena, Stream path, Options *options) lip.active = 0; beamformer_set_live_parameters(&lip); } else { - send_frame(data, &bp); + for (u32 i = 0; i < zbp->raw_data_dim[2]; i++) + send_frame(data + i * bp.raw_data_dimensions[0] * bp.raw_data_dimensions[1], &bp); } + free(zbp); free(data); } diff --git a/ui.c b/ui.c @@ -659,11 +659,14 @@ stream_append_variable_group(Stream *s, Variable *var) function s8 push_das_shader_kind(Stream *s, DASShaderKind shader, u32 transmit_count) { - #define X(type, id, pretty, fixed_tx) s8_comp(pretty), - read_only local_persist s8 pretty_names[DASShaderKind_Count + 1] = {DAS_TYPES s8_comp("Invalid")}; + #define X(__1, __2, pretty, ...) s8_comp(pretty), + read_only local_persist s8 pretty_names[DASShaderKind_Count + 1] = { + DAS_SHADER_KIND_LIST + s8_comp("Invalid") + }; #undef X - #define X(type, id, pretty, fixed_tx) fixed_tx, - read_only local_persist u8 fixed_transmits[DASShaderKind_Count + 1] = {DAS_TYPES 0}; + #define X(__1, __2, __3, fixed_tx) fixed_tx, + read_only local_persist u8 fixed_transmits[DASShaderKind_Count + 1] = {DAS_SHADER_KIND_LIST}; #undef X stream_append_s8(s, pretty_names[MIN(shader, DASShaderKind_Count)]); @@ -1171,8 +1174,8 @@ add_beamformer_parameters_view(Variable *parent, BeamformerCtx *ctx) add_beamformer_variable(ui, group, &ui->arena, s8("Sampling Frequency:"), s8("[MHz]"), &bp->sampling_frequency, (v2){0}, 1e-6f, 0, 0, ui->font); - add_beamformer_variable(ui, group, &ui->arena, s8("Center Frequency:"), s8("[MHz]"), - &bp->center_frequency, (v2){.y = 100e6f}, 1e-6f, 1e5f, + add_beamformer_variable(ui, group, &ui->arena, s8("Demodulation Frequency:"), s8("[MHz]"), + &bp->demodulation_frequency, (v2){.y = 100e6f}, 1e-6f, 1e5f, V_INPUT|V_TEXT|V_CAUSES_COMPUTE, ui->font); add_beamformer_variable(ui, group, &ui->arena, s8("Speed of Sound:"), s8("[m/s]"), @@ -1248,8 +1251,8 @@ ui_beamformer_frame_view_convert(BeamformerUI *ui, Arena *arena, Variable *view, bv->threshold.real32 = old? old->threshold.real32 : 55.0f; bv->gamma.scaled_real32.val = old? old->gamma.scaled_real32.val : 1.0f; bv->gamma.scaled_real32.scale = old? old->gamma.scaled_real32.scale : 0.05f; - bv->min_coordinate = (old && old->frame)? old->frame->min_coordinate.xyz : (v3){0}; - bv->max_coordinate = (old && old->frame)? old->frame->max_coordinate.xyz : (v3){0}; + bv->min_coordinate = (old && old->frame)? old->frame->min_coordinate : (v3){0}; + bv->max_coordinate = (old && old->frame)? old->frame->max_coordinate : (v3){0}; #define X(_t, pretty) s8_comp(pretty), read_only local_persist s8 kind_labels[] = {BEAMFORMER_FRAME_VIEW_KIND_LIST}; @@ -1501,8 +1504,8 @@ beamformer_frame_view_plane_size(BeamformerUI *ui, BeamformerFrameView *view) { v3 result; if (view->kind == BeamformerFrameViewKind_3DXPlane) { - v3 min = v4_from_f32_array(ui->params.output_min_coordinate).xyz; - v3 max = v4_from_f32_array(ui->params.output_max_coordinate).xyz; + v3 min = v3_from_f32_array(ui->params.output_min_coordinate); + v3 max = v3_from_f32_array(ui->params.output_max_coordinate); result = v3_sub(max, min); swap(result.y, result.z); result.x = MAX(1e-3f, result.x); @@ -1711,8 +1714,8 @@ view_update(BeamformerUI *ui, BeamformerFrameView *view) view->dirty |= view->frame != ui->latest_plane[index]; view->frame = ui->latest_plane[index]; if (view->dirty) { - view->min_coordinate = v4_from_f32_array(ui->params.output_min_coordinate).xyz; - view->max_coordinate = v4_from_f32_array(ui->params.output_max_coordinate).xyz; + view->min_coordinate = v3_from_f32_array(ui->params.output_min_coordinate); + view->max_coordinate = v3_from_f32_array(ui->params.output_max_coordinate); } }