shaders/filter: cleanup casting noise and length obfuscation - ogl_beamforming - Ultrasound Beamforming Implemented with OpenGL

Commit: e31466cec4aab2bef07643414e1d20d21a95b9e7
Parent: 2ef2a8e9a188fd2b25c7ccc484195fde69d8031e
Author: Randy Palamar
Date:   Sun,  2 Nov 2025 06:34:35 -0700

shaders/filter: cleanup casting noise and length obfuscation

also the LDS array had one extra element

Diffstat:
M shaders/filter.glsl  | 30 ++++++++++++++----------------

1 file changed, 14 insertions(+), 16 deletions(-)
diff --git a/shaders/filter.glsl b/shaders/filter.glsl
@@ -41,7 +41,7 @@ vec2 complex_mul(vec2 a, vec2 b)
 }
 
 #if Demodulate
-vec2 rotate_iq(vec2 iq, int index)
+vec2 rotate_iq(vec2 iq, uint index)
 {
 	vec2 result;
 	switch (SamplingMode) {
@@ -51,7 +51,7 @@ vec2 rotate_iq(vec2 iq, int index)
 		// cos -> 1 -1  1 -1
 		// sin -> 0  0  0  0
 		const float scales[2] = {1, -1};
-		result = scales[index & 1] * iq;
+		result = scales[index & 1u] * iq;
 	}break;
 	case SamplingMode_2X:{
 		// fs  = fd
@@ -77,7 +77,7 @@ SAMPLE_TYPE sample_rf(uint index)
 	return result;
 }
 
-shared SAMPLE_TYPE rf[FilterLength + gl_WorkGroupSize.x];
+shared SAMPLE_TYPE rf[FilterLength + gl_WorkGroupSize.x - 1];
 
 void main()
 {
@@ -91,23 +91,21 @@ void main()
 	                  OutputTransmitStride * transmit +
 	                  OutputSampleStride   * out_sample;
 
-	int thread_index = int(gl_LocalInvocationIndex);
-	int thread_count = int(gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z);
+	uint thread_index = gl_LocalInvocationIndex;
+	uint thread_count = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;
 	/////////////////////////
 	// NOTE: sample caching
 	{
-		int min_sample = DecimationRate * int((gl_WorkGroupID.x + 0) * gl_WorkGroupSize.x) - (FilterLength - 1);
-		int max_sample = DecimationRate * int((gl_WorkGroupID.x + 1) * gl_WorkGroupSize.x);
+		in_offset += DecimationRate * gl_WorkGroupID.x * gl_WorkGroupSize.x - (FilterLength - 1);
 
-		in_offset += min_sample;
-		int total_samples       = max_sample - min_sample;
-		int samples_per_thread  = total_samples / thread_count;
-		int leftover_count      = total_samples % thread_count;
-		int samples_this_thread = samples_per_thread + int(thread_index < leftover_count);
+		uint total_samples       = rf.length();
+		uint samples_per_thread  = total_samples / thread_count;
+		uint leftover_count      = total_samples % thread_count;
+		uint samples_this_thread = samples_per_thread + uint(thread_index < leftover_count);
 
 		const float scale = bool(ComplexFilter) ? 1 : sqrt(2);
-		for (int i = 0; i < samples_this_thread; i++) {
-			int index = thread_count * i + thread_index;
+		for (uint i = 0; i < samples_this_thread; i++) {
+			uint index = thread_count * i + thread_index;
 			if (gl_WorkGroupID.x == 0 && index < FilterLength) {
 				rf[index] = SAMPLE_TYPE(0);
 			} else {
@@ -123,8 +121,8 @@ void main()
 
 	if (out_sample < SampleCount / DecimationRate) {
 		SAMPLE_TYPE result = SAMPLE_TYPE(0);
-		int offset = DecimationRate * thread_index;
-		for (int j = 0; j < FilterLength; j++)
+		uint offset = DecimationRate * thread_index;
+		for (uint j = 0; j < FilterLength; j++)
 			result += apply_filter(rf[offset + j], filter_coefficients[j]);
 		out_data[out_offset] = RESULT_TYPE_CAST(result);
 	}