uforces/hercules: image is now computed in xdc space - ogl_beamforming - Ultrasound Beamforming Implemented with OpenGL

Commit: 4940910923dc59a36f6eaed57e6672e3d9d4c128
Parent: 9b0d67e1560e0a5c961192be8aea75bb72060585
Author: Randy Palamar
Date:   Mon, 16 Sep 2024 13:46:08 -0600

uforces/hercules: image is now computed in xdc space

This means that we compute a transform beforehand and shift the
image point accordingly. This transformation is calculated on the
CPU and sent to the GPU as a 3x3 matrix. This is probably better
than calculating it in every compute shader invocation but could
be measured.

Diffstat:
M beamformer.c  | 45 +++++++++++++++++++++++++++++++++++++--------
M beamformer.h  | 15 ++++++++-------
M main.c  | 2 ++
M shaders/hercules.glsl  | 29 ++++++++++++++++++-----------
M shaders/uforces.glsl  | 27 +++++++++++++++++----------
M util.c  | 37 +++++++++++++++++++++++++++++++++++++
M util.h  | 6 ++++++

7 files changed, 125 insertions(+), 36 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -103,6 +103,29 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a)
 	glNamedBufferStorage(cs->hadamard_ssbo, hadamard_elements * sizeof(i32), hadamard, 0);
 }
 
+static m3
+observation_direction_to_xdc_space(v3 direction, BeamformerParameters *bp, u32 idx)
+{
+	/* TODO: multiple xdc support */
+	(void)idx;
+
+	v3 edge1      = sub_v3(bp->xdc_corner1.xyz, bp->xdc_origin.xyz);
+	v3 edge2      = sub_v3(bp->xdc_corner2.xyz, bp->xdc_origin.xyz);
+	v3 xdc_normal = cross(edge1, edge2);
+	xdc_normal.z  = ABS(xdc_normal.z);
+
+	v3 e1 = normalize_v3(sub_v3(xdc_normal, direction));
+	v3 e2 = {.y = 1};
+	v3 e3 = normalize_v3(cross(e2, e1));
+
+	m3 result = {
+		.c[0] = (v3){.x = e3.x, .y = e2.x, .z = e1.x},
+		.c[1] = (v3){.x = e3.y, .y = e2.y, .z = e1.y},
+		.c[2] = (v3){.x = e3.z, .y = e2.z, .z = e1.z},
+	};
+	return result;
+}
+
 static b32
 do_volume_computation_step(BeamformerCtx *ctx, enum compute_shaders shader)
 {
@@ -224,14 +247,20 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]);
 		glUniform3iv(csctx->volume_export_dim_offset_id, 1, (i32 []){0, 0, 0});
 		glUniform1i(csctx->volume_export_pass_id, 0);
-		glActiveTexture(GL_TEXTURE0 + ctx->out_texture_unit);
-		glBindTexture(GL_TEXTURE_3D, ctx->out_texture);
-		glBindImageTexture(ctx->out_texture_unit, ctx->out_texture, 0, GL_TRUE, 0,
-		                   GL_WRITE_ONLY, GL_RG32F);
-		glUniform1i(csctx->out_data_tex_id, ctx->out_texture_unit);
-		glDispatchCompute(ORONE(ctx->out_data_dim.x / 32),
-		                  ctx->out_data_dim.y,
-		                  ORONE(ctx->out_data_dim.z / 32));
+
+		{
+			BeamformerParameters *bp = &ctx->params->raw;
+			m3 xdc_transform = observation_direction_to_xdc_space((v3){.z = 1}, bp, 0);
+			glActiveTexture(GL_TEXTURE0 + ctx->out_texture_unit);
+			glBindTexture(GL_TEXTURE_3D, ctx->out_texture);
+			glBindImageTexture(ctx->out_texture_unit, ctx->out_texture, 0, GL_TRUE, 0,
+			                   GL_WRITE_ONLY, GL_RG32F);
+			glUniform1i(csctx->out_data_tex_id, ctx->out_texture_unit);
+			glUniformMatrix3fv(csctx->xdc_transform_id, 1, GL_FALSE, xdc_transform.E);
+			glDispatchCompute(ORONE(ctx->out_data_dim.x / 32),
+			                  ctx->out_data_dim.y,
+			                  ORONE(ctx->out_data_dim.z / 32));
+		}
 		break;
 	default: ASSERT(0);
 	}
diff --git a/beamformer.h b/beamformer.h
@@ -36,13 +36,6 @@ typedef union {
 } v2;
 
 typedef union {
-	struct { f32 x, y, z; };
-	struct { f32 w, h, d; };
-	f32 E[3];
-	Vector3 rl;
-} v3;
-
-typedef union {
 	struct { f32 x, y, z, w; };
 	struct { f32 r, g, b, a; };
 	struct { v3 xyz; f32 _1; };
@@ -53,6 +46,12 @@ typedef union {
 } v4;
 
 typedef union {
+	struct { v3 x, y, z; };
+	v3  c[3];
+	f32 E[9];
+} m3;
+
+typedef union {
 	struct { v2 pos, size; };
 	Rectangle rl;
 } Rect;
@@ -187,11 +186,13 @@ typedef struct {
 
 	uv4 dec_data_dim;
 	uv2 rf_raw_dim;
+
 	i32 out_data_tex_id;
 	i32 mip_view_tex_id;
 	i32 mips_level_id;
 	i32 volume_export_pass_id;
 	i32 volume_export_dim_offset_id;
+	i32 xdc_transform_id;
 } ComputeShaderCtx;
 
 typedef struct {
diff --git a/main.c b/main.c
@@ -123,6 +123,8 @@ reload_shaders(BeamformerCtx *ctx, Arena a)
 	                                                          "u_volume_export_pass");
 	csctx->volume_export_dim_offset_id = glGetUniformLocation(csctx->programs[CS_HERCULES],
 	                                                         "u_volume_export_dim_offset");
+	csctx->xdc_transform_id            = glGetUniformLocation(csctx->programs[CS_UFORCES],
+	                                                          "u_xdc_transform");
 
 	csctx->out_data_tex_id = glGetUniformLocation(csctx->programs[CS_UFORCES], "u_out_data_tex");
 	csctx->mip_view_tex_id = glGetUniformLocation(csctx->programs[CS_MIN_MAX], "u_mip_view_tex");
diff --git a/shaders/hercules.glsl b/shaders/hercules.glsl
@@ -35,6 +35,7 @@ layout(r32f,  location = 2) uniform writeonly image3D u_out_volume_tex;
 
 layout(location = 3) uniform int   u_volume_export_pass;
 layout(location = 4) uniform ivec3 u_volume_export_dim_offset;
+layout(location = 5) uniform mat3  u_xdc_transform;
 
 #define C_SPLINE 0.5
 
@@ -73,27 +74,33 @@ vec2 cubic(uint ridx, float x)
 }
 #endif
 
-void main()
+vec3 calc_image_point(vec3 voxel)
 {
-	vec3  voxel        = vec3(gl_GlobalInvocationID.xyz)  + vec3(u_volume_export_dim_offset);
-	ivec3 out_coord    = ivec3(gl_GlobalInvocationID.xyz) + u_volume_export_dim_offset;
 	ivec3 out_data_dim;
-
 	if (u_volume_export_pass == 0) out_data_dim = imageSize(u_out_data_tex);
 	else                           out_data_dim = imageSize(u_out_volume_tex);
 
-	/* NOTE: Convert pixel to physical coordinates */
-	vec4 xdc_size      = xdc_corner1 + xdc_corner2 - xdc_origin;
-	vec3 edge1         = xdc_corner1.xyz - xdc_origin.xyz;
-	vec3 edge2         = xdc_corner2.xyz - xdc_origin.xyz;
-	vec3 xdc_normal    = cross(edge2, edge1);
-	xdc_normal        /= length(xdc_normal);
 	vec4 output_size   = abs(output_max_coord - output_min_coord);
 	vec3 image_point   = output_min_coord.xyz + voxel * output_size.xyz / out_data_dim.xyz;
 
 	if (u_volume_export_pass == 0)
 		image_point.y = off_axis_pos;
 
+	/* NOTE: move the image point into xdc space */
+	image_point = u_xdc_transform * image_point;
+	return image_point;
+}
+
+void main()
+{
+	vec3  voxel        = vec3(gl_GlobalInvocationID.xyz)  + vec3(u_volume_export_dim_offset);
+	ivec3 out_coord    = ivec3(gl_GlobalInvocationID.xyz) + u_volume_export_dim_offset;
+
+	/* NOTE: Convert pixel to physical coordinates */
+	vec3 edge1         = xdc_corner1.xyz - xdc_origin.xyz;
+	vec3 edge2         = xdc_corner2.xyz - xdc_origin.xyz;
+	vec3 image_point   = calc_image_point(voxel);
+
 	/* NOTE: used for constant F# dynamic receive apodization. This is implemented as:
 	 *
 	 *                  /        |x_e - x_i|\
@@ -101,7 +108,7 @@ void main()
 	 *                  \        |z_e - z_i|/
 	 *
 	 * where x,z_e are transducer element positions and x,z_i are image positions. */
-	float f_num    = output_size.z / output_size.x;
+	float f_num    = 0.5;
 	float apod_arg = f_num * 0.5 * radians(360) / abs(image_point.z);
 
 	/* NOTE: for I-Q data phase correction */
diff --git a/shaders/uforces.glsl b/shaders/uforces.glsl
@@ -35,6 +35,7 @@ layout(r32f,  location = 2) uniform writeonly image3D u_out_volume_tex;
 
 layout(location = 3) uniform int   u_volume_export_pass;
 layout(location = 4) uniform ivec3 u_volume_export_dim_offset;
+layout(location = 5) uniform mat3  u_xdc_transform;
 
 #define C_SPLINE 0.5
 
@@ -70,23 +71,29 @@ vec2 cubic(uint ridx, float x)
 }
 #endif
 
+vec3 calc_image_point(vec3 voxel)
+{
+	ivec3 out_data_dim = imageSize(u_out_data_tex);
+	vec4 output_size   = abs(output_max_coord - output_min_coord);
+	vec3 image_point   = output_min_coord.xyz + voxel * output_size.xyz / out_data_dim.xyz;
+
+	/* TODO: fix the math so that the image plane can be aritrary */
+	image_point.y = 0;
+
+	/* NOTE: move the image point into xdc space */
+	image_point = u_xdc_transform * image_point;
+	return image_point;
+}
+
 void main()
 {
 	vec3  voxel        = vec3(gl_GlobalInvocationID);
 	ivec3 out_coord    = ivec3(gl_GlobalInvocationID);
-	ivec3 out_data_dim = imageSize(u_out_data_tex);
 
 	/* NOTE: Convert voxel to physical coordinates */
-	vec4 xdc_size      = xdc_corner1 + xdc_corner2 - xdc_origin;
 	vec3 edge1         = xdc_corner1.xyz - xdc_origin.xyz;
 	vec3 edge2         = xdc_corner2.xyz - xdc_origin.xyz;
-	vec3 xdc_normal    = cross(edge2, edge1);
-	xdc_normal        /= length(xdc_normal);
-	vec4 output_size   = abs(output_max_coord - output_min_coord);
-	vec3 image_point   = output_min_coord.xyz + voxel * output_size.xyz / out_data_dim.xyz;
-
-	/* TODO: fix the math so that the image plane can be aritrary */
-	image_point.y = 0;
+	vec3 image_point   = calc_image_point(voxel);
 
 	/* NOTE: used for constant F# dynamic receive apodization. This is implemented as:
 	 *
@@ -95,7 +102,7 @@ void main()
 	 *                  \        |z_e - z_i|/
 	 *
 	 * where x,z_e are transducer element positions and x,z_i are image positions. */
-	float f_num    = 0.5; //output_size.z / output_size.x;
+	float f_num    = 0.5;
 	float apod_arg = f_num * 0.5 * radians(360) / abs(image_point.z);
 
 	/* NOTE: for I-Q data phase correction */
diff --git a/util.c b/util.c
@@ -64,6 +64,43 @@ round_down_power_of_2(u32 a)
 	return result;
 }
 
+static v3
+cross(v3 a, v3 b)
+{
+	v3 result = {
+		.x = a.y * b.z - a.z * b.y,
+		.y = a.z * b.x - a.x * b.z,
+		.z = a.x * b.y - a.y * b.x,
+	};
+	return result;
+}
+
+static v3
+sub_v3(v3 a, v3 b)
+{
+	v3 result = {
+		.x = a.x - b.x,
+		.y = a.y - b.y,
+		.z = a.z - b.z,
+	};
+	return result;
+}
+
+static f32
+length_v3(v3 a)
+{
+	f32 result = a.x * a.x + a.y * a.y + a.z * a.z;
+	return result;
+}
+
+static v3
+normalize_v3(v3 a)
+{
+	f32 length = length_v3(a);
+	v3 result = {.x = a.x / length, .y = a.y / length, .z = a.z / length};
+	return result;
+}
+
 static void
 fill_hadamard(i32 *m, u32 dim)
 {
diff --git a/util.h b/util.h
@@ -64,6 +64,12 @@ typedef union {
 	u32 E[4];
 } uv4;
 
+typedef union {
+	struct { f32 x, y, z; };
+	struct { f32 w, h, d; };
+	f32 E[3];
+} v3;
+
 #include "util.c"
 
 #endif /* _UTIL_H_ */

M	beamformer.c	\|	45	+++++++++++++++++++++++++++++++++++++--------
M	beamformer.h	\|	15	++++++++-------
M	main.c	\|	2	++
M	shaders/hercules.glsl	\|	29	++++++++++++++++++-----------
M	shaders/uforces.glsl	\|	27	+++++++++++++++++----------
M	util.c	\|	37	+++++++++++++++++++++++++++++++++++++
M	util.h	\|	6	++++++