ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: f815e8bb2229b323096765e1d00be10d0d0c57e2
Parent: 54effe8d17a212b7594c4c6a6d942286e2ea41e3
Author: Randy Palamar
Date:   Tue, 23 Sep 2025 19:35:57 -0600

shader/das: first pass at baking exact (integer) parameters

this gives performance boosts as high as 10% for FORCES. while I'm
typically against const it seems that including on function
parameters ensures that actual compile time constants carry
through correctly without additional register usage

Diffstat:
Mbeamformer.c | 109++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mbeamformer.h | 14++++++++------
Mshaders/das.glsl | 72++++++++++++++++++++++++++++++++----------------------------------------
3 files changed, 101 insertions(+), 94 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -1,13 +1,10 @@ /* See LICENSE for license details. */ /* TODO(rnp): - * [ ]: do JIT compilation of shaders - * - a larger subset of parameters can be made into compile time constants - * - preallocated storage for shaders is minimized - * - loops over TX and RX count can be unrolled - * - hot reload can still be trivially supported: - * - loop over shaders for the current pipeline - * - check if the base shader matches the shader we are trying to reload - * - load header and append constants which are stored in the pipeline parameters + * [ ]: refactor: fancier hot reloading for JIT shaders + * - loop over all active blocks + - loop over shader sets per block + * - when match found reload it + * [ ]: refactor: move shader bake parameters into meta code * [ ]: measure performance of doing channel mapping in a separate shader * [ ]: BeamformWorkQueue -> BeamformerWorkQueue * [ ]: need to keep track of gpu memory in some way @@ -445,7 +442,7 @@ das_voxel_transform_matrix(BeamformerParameters *bp) } function void -das_ubo_from_beamformer_parameters(BeamformerDASUBO *du, BeamformerParameters *bp) +das_ubo_from_beamformer_parameters(BeamformerComputePlan *cp, BeamformerDASUBO *du, BeamformerParameters *bp) { du->voxel_transform = das_voxel_transform_matrix(bp); mem_copy(du->xdc_transform.E, bp->xdc_transform, sizeof(du->xdc_transform)); @@ -455,13 +452,14 @@ das_ubo_from_beamformer_parameters(BeamformerDASUBO *du, BeamformerParameters *b du->speed_of_sound = bp->speed_of_sound; du->time_offset = bp->time_offset; du->f_number = bp->f_number; - du->shader_kind = bp->das_shader_id; - du->sample_count = bp->sample_count; - du->channel_count = bp->channel_count; - du->acquisition_count = bp->acquisition_count; - du->shader_flags = 0; - if (bp->coherency_weighting) du->shader_flags |= BeamformerShaderDASFlags_CoherencyWeighting; + cp->das_shader_kind = bp->das_shader_id; + cp->das_sample_count = bp->sample_count; + cp->das_channel_count = bp->channel_count; + cp->das_acquisition_count = bp->acquisition_count; + + cp->das_shader_flags = 0; + if (bp->coherency_weighting) cp->das_shader_flags |= BeamformerShaderDASFlags_CoherencyWeighting; } function void @@ -469,7 +467,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) { BeamformerDASUBO *bp = &cp->das_ubo_data; - das_ubo_from_beamformer_parameters(bp, &pb->parameters); + das_ubo_from_beamformer_parameters(cp, bp, &pb->parameters); b32 decode_first = pb->pipeline.shaders[0] == BeamformerShaderKind_Decode; b32 run_cuda_hilbert = 0; @@ -546,9 +544,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) das_data_kind = BeamformerDataKind_Float32Complex; i32 local_flags = 0; - if ((bp->shader_flags & BeamformerShaderDASFlags_CoherencyWeighting) == 0) + if ((cp->das_shader_flags & BeamformerShaderDASFlags_CoherencyWeighting) == 0) local_flags |= BeamformerShaderDASFlags_Fast; - if (bp->shader_kind == BeamformerDASKind_UFORCES || bp->shader_kind == BeamformerDASKind_UHERCULES) + if (cp->das_shader_kind == BeamformerDASKind_UFORCES || cp->das_shader_kind == BeamformerDASKind_UHERCULES) local_flags |= BeamformerShaderDASFlags_Sparse; if (pb->parameters.interpolate) local_flags |= BeamformerShaderDASFlags_Interpolate; @@ -572,8 +570,8 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) cp->pipeline.data_kind = data_kind; u32 das_sample_stride = 1; - u32 das_transmit_stride = bp->sample_count; - u32 das_channel_stride = bp->acquisition_count * bp->sample_count; + u32 das_transmit_stride = cp->das_sample_count; + u32 das_channel_stride = cp->das_acquisition_count * cp->das_sample_count; u32 decimation_rate = MAX(pb->parameters.decimation_rate, 1); if (demodulate) { @@ -582,14 +580,14 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) } u32 input_sample_stride = 1; - u32 input_transmit_stride = bp->sample_count; + u32 input_transmit_stride = cp->das_sample_count; u32 input_channel_stride = pb->parameters.raw_data_dimensions[0]; BeamformerDecodeUBO *dp = &cp->decode_ubo_data; dp->decode_mode = pb->parameters.decode; - dp->transmit_count = bp->acquisition_count; + dp->transmit_count = cp->das_acquisition_count; - dp->input_sample_stride = decode_first? input_sample_stride : bp->acquisition_count; + dp->input_sample_stride = decode_first? input_sample_stride : cp->das_acquisition_count; dp->input_channel_stride = decode_first? input_channel_stride : das_channel_stride; dp->input_transmit_stride = decode_first? input_transmit_stride : 1; dp->output_sample_stride = das_sample_stride; @@ -600,9 +598,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) dp->output_transmit_stride *= decimation_rate; } - cp->decode_dispatch.x = (u32)ceil_f32((f32)bp->sample_count / DECODE_LOCAL_SIZE_X); - cp->decode_dispatch.y = (u32)ceil_f32((f32)bp->channel_count / DECODE_LOCAL_SIZE_Y); - cp->decode_dispatch.z = (u32)ceil_f32((f32)bp->acquisition_count / DECODE_LOCAL_SIZE_Z); + cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_sample_count / DECODE_LOCAL_SIZE_X); + cp->decode_dispatch.y = (u32)ceil_f32((f32)cp->das_channel_count / DECODE_LOCAL_SIZE_Y); + cp->decode_dispatch.z = (u32)ceil_f32((f32)cp->das_acquisition_count / DECODE_LOCAL_SIZE_Z); /* NOTE(rnp): decode 2 samples per dispatch when data is i16 */ if (decode_first && data_kind == BeamformerDataKind_Int16) @@ -624,7 +622,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) mp->decimation_rate = decimation_rate; bp->sampling_frequency /= 2 * (f32)mp->decimation_rate; - bp->sample_count /= 2 * mp->decimation_rate; + cp->das_sample_count /= 2 * mp->decimation_rate; if (decode_first) { mp->input_channel_stride = dp->output_channel_stride; @@ -644,16 +642,16 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) mp->output_sample_stride = dp->input_sample_stride; mp->output_transmit_stride = dp->input_transmit_stride; - cp->decode_dispatch.x = (u32)ceil_f32((f32)bp->sample_count / DECODE_LOCAL_SIZE_X); + cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->das_sample_count / DECODE_LOCAL_SIZE_X); } } /* TODO(rnp): filter may need a different dispatch layout */ - cp->demod_dispatch.x = (u32)ceil_f32((f32)bp->sample_count / FILTER_LOCAL_SIZE_X); - cp->demod_dispatch.y = (u32)ceil_f32((f32)bp->channel_count / FILTER_LOCAL_SIZE_Y); - cp->demod_dispatch.z = (u32)ceil_f32((f32)bp->acquisition_count / FILTER_LOCAL_SIZE_Z); + cp->demod_dispatch.x = (u32)ceil_f32((f32)cp->das_sample_count / FILTER_LOCAL_SIZE_X); + cp->demod_dispatch.y = (u32)ceil_f32((f32)cp->das_channel_count / FILTER_LOCAL_SIZE_Y); + cp->demod_dispatch.z = (u32)ceil_f32((f32)cp->das_acquisition_count / FILTER_LOCAL_SIZE_Z); - cp->rf_size = bp->sample_count * bp->channel_count * bp->acquisition_count; + cp->rf_size = cp->das_sample_count * cp->das_channel_count * cp->das_acquisition_count; if (demodulate || run_cuda_hilbert) cp->rf_size *= 8; else cp->rf_size *= 4; @@ -662,12 +660,12 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) flt->demodulation_frequency = bp->demodulation_frequency; flt->sampling_frequency = bp->sampling_frequency; flt->decimation_rate = 1; - flt->output_channel_stride = bp->sample_count * bp->acquisition_count; + flt->output_channel_stride = cp->das_sample_count * cp->das_acquisition_count; flt->output_sample_stride = 1; - flt->output_transmit_stride = bp->sample_count; - flt->input_channel_stride = bp->sample_count * bp->acquisition_count; + flt->output_transmit_stride = cp->das_sample_count; + flt->input_channel_stride = cp->das_sample_count * cp->das_acquisition_count; flt->input_sample_stride = 1; - flt->input_transmit_stride = bp->sample_count; + flt->input_transmit_stride = cp->das_sample_count; } function void @@ -741,7 +739,6 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl BeamformerShaderKind shader = cp->pipeline.shaders[shader_slot]; BeamformerShaderDescriptor *sd = beamformer_shader_descriptors + shader; - u32 program = 0; i32 reloadable_index = beamformer_shader_reloadable_index_by_shader[shader]; if (reloadable_index != -1) { @@ -767,10 +764,28 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl if (sd->has_local_flags) { stream_append_s8(&shader_stream, s8("#define ShaderFlags (0x")); - stream_append_hex_u64(&shader_stream, (u64)match_vector[sd->match_vector_length]); + i32 flags = match_vector[sd->match_vector_length]; + if (shader == BeamformerShaderKind_DAS) flags |= cp->das_shader_flags; + stream_append_hex_u64(&shader_stream, (u64)flags); stream_append_s8(&shader_stream, s8(")\n")); } + /* TODO(rnp): generate this */ + switch (base_shader) { + case BeamformerShaderKind_DAS:{ + stream_append_s8(&shader_stream, s8("#define ShaderKind ")); + stream_append_u64(&shader_stream, cp->das_shader_kind); + stream_append_s8(&shader_stream, s8("\n#define SampleCount ")); + stream_append_u64(&shader_stream, cp->das_sample_count); + stream_append_s8(&shader_stream, s8("\n#define ChannelCount ")); + stream_append_u64(&shader_stream, cp->das_channel_count); + stream_append_s8(&shader_stream, s8("\n#define AcquisitionCount ")); + stream_append_u64(&shader_stream, cp->das_acquisition_count); + stream_append_s8(&shader_stream, s8("\n")); + }break; + default:{}break; + } + stream_append_s8(&shader_stream, s8("\n#line 1\n")); s8 shader_text = arena_stream_commit(&arena, &shader_stream); @@ -820,8 +835,8 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, if (ctx->compute_context.ping_pong_ssbo_size < decoded_data_size) alloc_shader_storage(ctx, decoded_data_size, arena); - if (cp->hadamard_order != (i32)cp->das_ubo_data.acquisition_count) - update_hadamard_texture(cp, (i32)cp->das_ubo_data.acquisition_count, arena); + if (cp->hadamard_order != (i32)cp->das_acquisition_count) + update_hadamard_texture(cp, (i32)cp->das_acquisition_count, arena); cp->min_coordinate = v3_from_f32_array(pb->parameters.output_min_coordinate); cp->max_coordinate = v3_from_f32_array(pb->parameters.output_max_coordinate); @@ -954,8 +969,6 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame case BeamformerShaderKind_DAS:{ local_persist u32 das_cycle_t = 0; - BeamformerDASUBO *ubo = &cp->das_ubo_data; - i32 local_flags = match_vector[shader_descriptor->match_vector_length]; b32 fast = (local_flags & BeamformerShaderDASFlags_Fast) != 0; b32 sparse = (local_flags & BeamformerShaderDASFlags_Sparse) != 0; @@ -981,14 +994,14 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame if (fast) { i32 loop_end; - if (ubo->shader_kind == BeamformerDASKind_RCA_VLS || - ubo->shader_kind == BeamformerDASKind_RCA_TPW) + if (cp->das_shader_kind == BeamformerDASKind_RCA_VLS || + cp->das_shader_kind == BeamformerDASKind_RCA_TPW) { /* NOTE(rnp): to avoid repeatedly sampling the whole focal vectors * texture we loop over transmits for VLS/TPW */ - loop_end = (i32)ubo->acquisition_count; + loop_end = (i32)cp->das_acquisition_count; } else { - loop_end = (i32)ubo->channel_count; + loop_end = (i32)cp->das_channel_count; } f32 percent_per_step = 1.0f / (f32)loop_end; cc->processing_progress = -percent_per_step; @@ -1204,8 +1217,8 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c frame->min_coordinate = cp->min_coordinate; frame->max_coordinate = cp->max_coordinate; - frame->das_kind = cp->das_ubo_data.shader_kind; - frame->compound_count = cp->das_ubo_data.acquisition_count; + frame->das_kind = cp->das_shader_kind; + frame->compound_count = cp->das_acquisition_count; BeamformerComputeContext *cc = &ctx->compute_context; BeamformerComputePipeline *pipeline = &cp->pipeline; diff --git a/beamformer.h b/beamformer.h @@ -139,12 +139,7 @@ typedef struct { X(demodulation_frequency, f32, float) \ X(speed_of_sound, f32, float) \ X(time_offset, f32, float) \ - X(f_number, f32, float) \ - X(shader_flags, u32, int) \ - X(shader_kind, u32, uint) \ - X(sample_count, u32, uint) \ - X(channel_count, u32, uint) \ - X(acquisition_count, u32, uint) + X(f_number, f32, float) typedef alignas(16) struct { #define X(name, type, ...) type name; @@ -165,6 +160,7 @@ typedef alignas(16) struct { #define X(name, type, ...) type name; BEAMFORMER_DAS_UBO_PARAM_LIST #undef X + float _pad[1]; } BeamformerDASUBO; static_assert((sizeof(BeamformerDASUBO) & 15) == 0, "UBO size must be a multiple of 16"); @@ -228,6 +224,12 @@ struct BeamformerComputePlan { BEAMFORMER_COMPUTE_UBO_LIST #undef X + u32 das_shader_kind; + u32 das_sample_count; + u32 das_channel_count; + u32 das_acquisition_count; + i32 das_shader_flags; + BeamformerComputePlan *next; }; diff --git a/shaders/das.glsl b/shaders/das.glsl @@ -35,9 +35,10 @@ layout(std430, binding = 1) readonly restrict buffer buffer_1 { #define RESULT_STORE(a, length_a) RESULT_TYPE(a, length_a) #endif -const bool fast = bool(ShaderFlags & ShaderFlags_Fast); -const bool sparse = bool(ShaderFlags & ShaderFlags_Sparse); -const bool interpolate = bool(ShaderFlags & ShaderFlags_Interpolate); +const bool fast = bool(ShaderFlags & ShaderFlags_Fast); +const bool sparse = bool(ShaderFlags & ShaderFlags_Sparse); +const bool interpolate = bool(ShaderFlags & ShaderFlags_Interpolate); +const bool coherency_weighting = bool(ShaderFlags & ShaderFlags_CoherencyWeighting); #if (ShaderFlags & ShaderFlags_Fast) layout(TEXTURE_KIND, binding = 0) restrict uniform image3D u_out_data_tex; @@ -55,7 +56,7 @@ layout(r8i, binding = 3) readonly restrict uniform iimage1D transmit_receive_ #define C_SPLINE 0.5 #if DataKind == DataKind_Float32Complex -vec2 rotate_iq(vec2 iq, float time) +vec2 rotate_iq(const vec2 iq, const float time) { float arg = radians(360) * demodulation_frequency * time; mat2 phasor = mat2( cos(arg), sin(arg), @@ -68,7 +69,7 @@ vec2 rotate_iq(vec2 iq, float time) #endif /* NOTE: See: https://cubic.org/docs/hermite.htm */ -SAMPLE_TYPE cubic(int base_index, float index) +SAMPLE_TYPE cubic(const int base_index, const float index) { const mat4 h = mat4( 2, -3, 0, 1, @@ -101,23 +102,23 @@ SAMPLE_TYPE cubic(int base_index, float index) return result; } -SAMPLE_TYPE sample_rf(int channel, int transmit, float index) +SAMPLE_TYPE sample_rf(const int channel, const int transmit, const float index) { - SAMPLE_TYPE result = SAMPLE_TYPE(index >= 0.0f) * SAMPLE_TYPE((int(index) + 1 + int(interpolate)) < sample_count); - int base_index = int(channel * sample_count * acquisition_count + transmit * sample_count); + SAMPLE_TYPE result = SAMPLE_TYPE(index >= 0.0f) * SAMPLE_TYPE((int(index) + 1 + int(interpolate)) < SampleCount); + int base_index = int(channel * SampleCount * AcquisitionCount + transmit * SampleCount); if (interpolate) result *= cubic(base_index, index); else result *= rf_data[base_index + int(round(index))]; result = rotate_iq(result, index / sampling_frequency); return result; } -float sample_index(float distance) +float sample_index(const float distance) { float time = distance / speed_of_sound + time_offset; return time * sampling_frequency; } -float apodize(float arg) +float apodize(const float arg) { /* NOTE: used for constant F# dynamic receive apodization. This is implemented as: * @@ -130,24 +131,25 @@ float apodize(float arg) return a * a; } -vec2 rca_plane_projection(vec3 point, bool rows) +vec2 rca_plane_projection(const vec3 point, const bool rows) { vec2 result = vec2(point[int(rows)], point[2]); return result; } -float plane_wave_transmit_distance(vec3 point, float transmit_angle, bool tx_rows) +float plane_wave_transmit_distance(const vec3 point, const float transmit_angle, const bool tx_rows) { return dot(rca_plane_projection(point, tx_rows), vec2(sin(transmit_angle), cos(transmit_angle))); } -float cylindrical_wave_transmit_distance(vec3 point, float focal_depth, float transmit_angle, bool tx_rows) +float cylindrical_wave_transmit_distance(const vec3 point, const float focal_depth, + const float transmit_angle, const bool tx_rows) { vec2 f = focal_depth * vec2(sin(transmit_angle), cos(transmit_angle)); return distance(rca_plane_projection(point, tx_rows), f); } -float rca_transmit_distance(vec3 world_point, vec2 focal_vector, int transmit_receive_orientation) +float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, const int transmit_receive_orientation) { bool tx_rows = (transmit_receive_orientation & TX_ORIENTATION_MASK) == 0; float transmit_angle = radians(focal_vector.x); @@ -162,8 +164,10 @@ float rca_transmit_distance(vec3 world_point, vec2 focal_vector, int transmit_re return result; } -RESULT_TYPE RCA_acquisition_range(vec3 world_point, int acquisition_start, int acquisition_end) +RESULT_TYPE RCA(const vec3 world_point) { + const int acquisition_start = fast? u_channel : 0; + const int acquisition_end = fast? u_channel + 1 : AcquisitionCount; RESULT_TYPE result = RESULT_TYPE(0); for (int acquisition = acquisition_start; acquisition < acquisition_end; acquisition++) { int transmit_receive_orientation = imageLoad(transmit_receive_orientations, acquisition).x; @@ -172,7 +176,7 @@ RESULT_TYPE RCA_acquisition_range(vec3 world_point, int acquisition_start, int a float transmit_distance = rca_transmit_distance(world_point, imageLoad(focal_vectors, acquisition).xy, transmit_receive_orientation); - for (int rx_channel = 0; rx_channel < channel_count; rx_channel++) { + for (int rx_channel = 0; rx_channel < ChannelCount; rx_channel++) { vec3 rx_center = vec3(rx_channel * xdc_element_pitch, 0); vec2 receive_vector = xdc_world_point - rca_plane_projection(rx_center, rx_rows); float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.y) * receive_vector.x); @@ -187,14 +191,11 @@ RESULT_TYPE RCA_acquisition_range(vec3 world_point, int acquisition_start, int a return result; } -RESULT_TYPE RCA(vec3 world_point) +RESULT_TYPE HERCULES(const vec3 world_point) { - if (fast) return RESULT_TYPE_CAST(RCA_acquisition_range(world_point, u_channel, u_channel + 1)); - else return RESULT_TYPE_CAST(RCA_acquisition_range(world_point, 0, int(acquisition_count))); -} + const int rx_channel_start = fast? u_channel : 0; + const int rx_channel_end = fast? u_channel + 1 : ChannelCount; -RESULT_TYPE HERCULES_receive_channel_range(vec3 world_point, int rx_channel_start, int rx_channel_end) -{ int transmit_receive_orientation = imageLoad(transmit_receive_orientations, 0).x; vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; bool rx_cols = (transmit_receive_orientation & RX_ORIENTATION_MASK) != 0; @@ -202,7 +203,7 @@ RESULT_TYPE HERCULES_receive_channel_range(vec3 world_point, int rx_channel_star transmit_receive_orientation); RESULT_TYPE result = RESULT_TYPE(0); - for (int transmit = int(sparse); transmit < acquisition_count; transmit++) { + for (int transmit = int(sparse); transmit < AcquisitionCount; transmit++) { int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit; for (int rx_channel = rx_channel_start; rx_channel < rx_channel_end; rx_channel++) { vec3 element_position; @@ -213,7 +214,7 @@ RESULT_TYPE HERCULES_receive_channel_range(vec3 world_point, int rx_channel_star distance(xdc_world_point.xy, element_position.xy)); if (apodization > 0) { /* NOTE: tribal knowledge */ - if (transmit == 0) apodization *= inversesqrt(acquisition_count); + if (transmit == 0) apodization *= inversesqrt(AcquisitionCount); float sidx = sample_index(transmit_distance + distance(xdc_world_point, element_position)); SAMPLE_TYPE value = apodization * sample_rf(rx_channel, transmit, sidx); @@ -224,14 +225,11 @@ RESULT_TYPE HERCULES_receive_channel_range(vec3 world_point, int rx_channel_star return result; } -RESULT_TYPE HERCULES(vec3 world_point) +RESULT_TYPE FORCES(const vec3 world_point) { - if (fast) return RESULT_TYPE_CAST(HERCULES_receive_channel_range(world_point, u_channel, u_channel + 1)); - else return RESULT_TYPE_CAST(HERCULES_receive_channel_range(world_point, 0, int(channel_count))); -} + const int rx_channel_start = fast? u_channel : 0; + const int rx_channel_end = fast? u_channel + 1 : ChannelCount; -RESULT_TYPE FORCES_receive_channel_range(vec3 world_point, int rx_channel_start, int rx_channel_end) -{ RESULT_TYPE result = RESULT_TYPE(0); vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; for (int rx_channel = rx_channel_start; rx_channel < rx_channel_end; rx_channel++) { @@ -239,9 +237,9 @@ RESULT_TYPE FORCES_receive_channel_range(vec3 world_point, int rx_channel_start, float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.z) * (xdc_world_point.x - rx_channel * xdc_element_pitch.x)); if (apodization > 0) { - for (int transmit = int(sparse); transmit < acquisition_count; transmit++) { + for (int transmit = int(sparse); transmit < AcquisitionCount; transmit++) { int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit; - vec3 transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(channel_count / 2)), 0); + vec3 transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(ChannelCount / 2)), 0); float sidx = sample_index(distance(xdc_world_point, transmit_center) + receive_distance); SAMPLE_TYPE value = apodization * sample_rf(rx_channel, transmit, sidx); @@ -252,12 +250,6 @@ RESULT_TYPE FORCES_receive_channel_range(vec3 world_point, int rx_channel_start, return result; } -RESULT_TYPE FORCES(vec3 world_point) -{ - if (fast) return RESULT_TYPE_CAST(FORCES_receive_channel_range(world_point, u_channel, u_channel + 1)); - else return RESULT_TYPE_CAST(FORCES_receive_channel_range(world_point, 0, int(channel_count))); -} - void main() { ivec3 out_voxel = ivec3(gl_GlobalInvocationID); @@ -273,7 +265,7 @@ void main() vec3 world_point = (voxel_transform * vec4(out_voxel, 1)).xyz; - switch (shader_kind) { + switch (ShaderKind) { case ShaderKind_FORCES: case ShaderKind_UFORCES: { @@ -294,7 +286,7 @@ void main() #if (ShaderFlags & ShaderFlags_Fast) == 0 /* TODO(rnp): scale such that brightness remains ~constant */ - if (bool(shader_flags & ShaderFlags_CoherencyWeighting)) { + if (coherency_weighting) { float denominator = sum[RESULT_LAST_INDEX] + float(sum[RESULT_LAST_INDEX] == 0); RESULT_TYPE_CAST(sum) *= RESULT_TYPE_CAST(sum) / denominator; }