Commit: 5c644e1497edea66e809f1d327031c72910d9d06
Parent: b43c2936d5df2d6cdf3a2e605a3defd3dbd58ce7
Author: Randy Palamar
Date: Thu, 26 Mar 2026 13:47:23 -0600
shaders/das: remove incredibly stupid instruction dependency in cubic
We already needed to calculate the integer portion of the sample
index to check if it was in bounds for the current (transmit,
receive) pair. By using the integer return from modf() in the
address calculation instead of the value we already calculated we
force the GPU to finish executing the modf() before issuing the
load. Even if we just base the execution on the shader disassembly
we can see that the load now comes before the instruction used to
calculate the fractional portion of sample index (the used
instruction is different in OpenGL vs Vulkan idk why). The means
the GPU has more math ops to shove between the sample loading and
sample usage which improves latency hiding.
The performance boost from this in OpenGL is a bit of a wash but
it is consistent in Vulkan (upto ~5%).
Diffstat:
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/shaders/das.glsl b/shaders/das.glsl
@@ -60,7 +60,7 @@ vec2 rotate_iq(const vec2 iq, const float time)
#endif
/* NOTE: See: https://cubic.org/docs/hermite.htm */
-SAMPLE_TYPE cubic(const int base_index, const float index)
+SAMPLE_TYPE cubic(const int base_index, const float t)
{
const mat4 h = mat4(
2, -3, 0, 1,
@@ -69,12 +69,11 @@ SAMPLE_TYPE cubic(const int base_index, const float index)
1, -1, 0, 0
);
- float tk, t = modf(index, tk);
SAMPLE_TYPE samples[4] = {
- rf_data[base_index + int(tk) - 1],
- rf_data[base_index + int(tk) + 0],
- rf_data[base_index + int(tk) + 1],
- rf_data[base_index + int(tk) + 2],
+ rf_data[base_index + 0],
+ rf_data[base_index + 1],
+ rf_data[base_index + 2],
+ rf_data[base_index + 3],
};
vec4 S = vec4(t * t * t, t * t, t, 1);
@@ -111,8 +110,10 @@ SAMPLE_TYPE sample_rf(const int channel, const int transmit, const float index)
result = rotate_iq(result, index / SamplingFrequency);
}break;
case InterpolationMode_Cubic:{
- if (index >= 0 && (int(index) + 2) < SampleCount)
- result = rotate_iq(cubic(base_index, index), index / SamplingFrequency);
+ if (index > 0 && int(index) < SampleCount - 2) {
+ float tk, t = modf(index, tk);
+ result = rotate_iq(cubic(base_index + int(index) - 1, t), index / SamplingFrequency);
+ }
}break;
}
return result;