ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 341f659f3c4b6f00459b26d8befc9c1ed3093d29
Parent: 247202ca0919c3f44a47c22f7b049c7a97060090
Author: Randy Palamar
Date:   Wed,  1 Apr 2026 11:19:40 -0600

shaders/das: move constants out of FORCES loop

There were a handful of calculations which remain constant
throughout each portion of the FORCES loop. You might hope that
the compiler is smart enough to move them out but it is not. I
have moved every constant I could see as far up as it can go. This
included lifting portions of the transmit distance calculation
(the one that runs 16K times per pixel) out of both loops. I also
pre-baked the (-1) portion of the rf data offset into the scalar
(in terms of register usage) portion when cubic interpolation is
active.

This results in a significant performance boost for FORCES DAS of
up to 10%.

The other methods also have some constants which need to be moved
up. That will need to be done later.

Diffstat:
Mshaders/das.glsl | 31++++++++++++++++++++-----------
1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/shaders/das.glsl b/shaders/das.glsl @@ -111,7 +111,7 @@ SAMPLE_TYPE sample_rf(const int rf_offset, const float index) case InterpolationMode_Cubic:{ if (index > 0 && int(index) < SampleCount - 2) { float tk, t = modf(index, tk); - result = rotate_iq(cubic(rf_offset + int(index) - 1, t), index / SamplingFrequency); + result = rotate_iq(cubic(rf_offset + int(index), t), index / SamplingFrequency); } }break; } @@ -199,7 +199,8 @@ RESULT_TYPE RCA(const vec3 world_point) vec2 xdc_world_point = rca_plane_projection((xdc_transform * vec4(world_point, 1)).xyz, rx_rows); float transmit_distance = rca_transmit_distance(world_point, focal_vector, tx_rx_orientation); - int rf_offset = acquisition * SampleCount; + int rf_offset = acquisition * SampleCount; + rf_offset -= int(InterpolationMode == InterpolationMode_Cubic); for (int rx_channel = 0; rx_channel < ChannelCount; rx_channel++) { vec3 rx_center = vec3(rx_channel * xdc_element_pitch, 0); vec2 receive_vector = xdc_world_point - rca_plane_projection(rx_center, rx_rows); @@ -228,6 +229,7 @@ RESULT_TYPE HERCULES(const vec3 world_point) for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) { int tx_channel = bool(Sparse) ? imageLoad(sparse_elements, transmit - Sparse).x : transmit; int rf_offset = transmit * SampleCount; + rf_offset -= int(InterpolationMode == InterpolationMode_Cubic); #if Fast const int rx_channel = u_channel; rf_offset += rx_channel * SampleCount * AcquisitionCount; @@ -263,20 +265,27 @@ RESULT_TYPE FORCES(const vec3 xdc_world_point) const int rx_channel_end = bool(Fast)? u_channel + 1 : ChannelCount; RESULT_TYPE result = RESULT_TYPE(0); + + float z_delta_squared = xdc_world_point.z * xdc_world_point.z; + float transmit_y_delta = xdc_world_point.y - xdc_element_pitch.y * ChannelCount / 2; + float transmit_yz_squared = transmit_y_delta * transmit_y_delta + z_delta_squared; + for (int rx_channel = rx_channel_start; rx_channel < rx_channel_end; rx_channel++) { - float receive_distance = distance(xdc_world_point.xz, vec2(rx_channel * xdc_element_pitch.x, 0)); - float a_arg = abs(FNumber * (xdc_world_point.x - rx_channel * xdc_element_pitch.x) / - abs(xdc_world_point.z)); + float receive_x_delta = xdc_world_point.x - rx_channel * xdc_element_pitch.x; + float a_arg = abs(FNumber * receive_x_delta / xdc_world_point.z); if (a_arg < 0.5f) { - int rf_offset = rx_channel * SampleCount * AcquisitionCount + Sparse * SampleCount; - float apodization = apodize(a_arg); + int rf_offset = rx_channel * SampleCount * AcquisitionCount + Sparse * SampleCount; + rf_offset -= int(InterpolationMode == InterpolationMode_Cubic); + + float receive_index = sample_index(sqrt(receive_x_delta * receive_x_delta + z_delta_squared)); + float apodization = apodize(a_arg); for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) { - int tx_channel = bool(Sparse) ? imageLoad(sparse_elements, transmit - Sparse).x : transmit; - vec3 transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(ChannelCount / 2)), 0); + int tx_channel = bool(Sparse) ? imageLoad(sparse_elements, transmit - Sparse).x : transmit; + float transmit_x_delta = xdc_world_point.x - xdc_element_pitch.x * tx_channel; + float transmit_index = sqrt(transmit_yz_squared + transmit_x_delta * transmit_x_delta) * SamplingFrequency / SpeedOfSound; - float sidx = sample_index(distance(xdc_world_point, transmit_center) + receive_distance); - SAMPLE_TYPE value = apodization * sample_rf(rf_offset, sidx); + SAMPLE_TYPE value = apodization * sample_rf(rf_offset, receive_index + transmit_index); result += RESULT_STORE(value, length(value)); rf_offset += SampleCount; }