ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

Commit: 4940910923dc59a36f6eaed57e6672e3d9d4c128
Parent: 9b0d67e1560e0a5c961192be8aea75bb72060585
Author: Randy Palamar
Date:   Mon, 16 Sep 2024 13:46:08 -0600

uforces/hercules: image is now computed in xdc space

This means that we compute a transform beforehand and shift the
image point accordingly. This transformation is calculated on the
CPU and sent to the GPU as a 3x3 matrix. This is probably better
than calculating it in every compute shader invocation but could
be measured.

Diffstat:
Mbeamformer.c | 45+++++++++++++++++++++++++++++++++++++--------
Mbeamformer.h | 15++++++++-------
Mmain.c | 2++
Mshaders/hercules.glsl | 29++++++++++++++++++-----------
Mshaders/uforces.glsl | 27+++++++++++++++++----------
Mutil.c | 37+++++++++++++++++++++++++++++++++++++
Mutil.h | 6++++++
7 files changed, 125 insertions(+), 36 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -103,6 +103,29 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a) glNamedBufferStorage(cs->hadamard_ssbo, hadamard_elements * sizeof(i32), hadamard, 0); } +static m3 +observation_direction_to_xdc_space(v3 direction, BeamformerParameters *bp, u32 idx) +{ + /* TODO: multiple xdc support */ + (void)idx; + + v3 edge1 = sub_v3(bp->xdc_corner1.xyz, bp->xdc_origin.xyz); + v3 edge2 = sub_v3(bp->xdc_corner2.xyz, bp->xdc_origin.xyz); + v3 xdc_normal = cross(edge1, edge2); + xdc_normal.z = ABS(xdc_normal.z); + + v3 e1 = normalize_v3(sub_v3(xdc_normal, direction)); + v3 e2 = {.y = 1}; + v3 e3 = normalize_v3(cross(e2, e1)); + + m3 result = { + .c[0] = (v3){.x = e3.x, .y = e2.x, .z = e1.x}, + .c[1] = (v3){.x = e3.y, .y = e2.y, .z = e1.y}, + .c[2] = (v3){.x = e3.z, .y = e2.z, .z = e1.z}, + }; + return result; +} + static b32 do_volume_computation_step(BeamformerCtx *ctx, enum compute_shaders shader) { @@ -224,14 +247,20 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]); glUniform3iv(csctx->volume_export_dim_offset_id, 1, (i32 []){0, 0, 0}); glUniform1i(csctx->volume_export_pass_id, 0); - glActiveTexture(GL_TEXTURE0 + ctx->out_texture_unit); - glBindTexture(GL_TEXTURE_3D, ctx->out_texture); - glBindImageTexture(ctx->out_texture_unit, ctx->out_texture, 0, GL_TRUE, 0, - GL_WRITE_ONLY, GL_RG32F); - glUniform1i(csctx->out_data_tex_id, ctx->out_texture_unit); - glDispatchCompute(ORONE(ctx->out_data_dim.x / 32), - ctx->out_data_dim.y, - ORONE(ctx->out_data_dim.z / 32)); + + { + BeamformerParameters *bp = &ctx->params->raw; + m3 xdc_transform = observation_direction_to_xdc_space((v3){.z = 1}, bp, 0); + glActiveTexture(GL_TEXTURE0 + ctx->out_texture_unit); + glBindTexture(GL_TEXTURE_3D, ctx->out_texture); + glBindImageTexture(ctx->out_texture_unit, ctx->out_texture, 0, GL_TRUE, 0, + GL_WRITE_ONLY, GL_RG32F); + glUniform1i(csctx->out_data_tex_id, ctx->out_texture_unit); + glUniformMatrix3fv(csctx->xdc_transform_id, 1, GL_FALSE, xdc_transform.E); + glDispatchCompute(ORONE(ctx->out_data_dim.x / 32), + ctx->out_data_dim.y, + ORONE(ctx->out_data_dim.z / 32)); + } break; default: ASSERT(0); } diff --git a/beamformer.h b/beamformer.h @@ -36,13 +36,6 @@ typedef union { } v2; typedef union { - struct { f32 x, y, z; }; - struct { f32 w, h, d; }; - f32 E[3]; - Vector3 rl; -} v3; - -typedef union { struct { f32 x, y, z, w; }; struct { f32 r, g, b, a; }; struct { v3 xyz; f32 _1; }; @@ -53,6 +46,12 @@ typedef union { } v4; typedef union { + struct { v3 x, y, z; }; + v3 c[3]; + f32 E[9]; +} m3; + +typedef union { struct { v2 pos, size; }; Rectangle rl; } Rect; @@ -187,11 +186,13 @@ typedef struct { uv4 dec_data_dim; uv2 rf_raw_dim; + i32 out_data_tex_id; i32 mip_view_tex_id; i32 mips_level_id; i32 volume_export_pass_id; i32 volume_export_dim_offset_id; + i32 xdc_transform_id; } ComputeShaderCtx; typedef struct { diff --git a/main.c b/main.c @@ -123,6 +123,8 @@ reload_shaders(BeamformerCtx *ctx, Arena a) "u_volume_export_pass"); csctx->volume_export_dim_offset_id = glGetUniformLocation(csctx->programs[CS_HERCULES], "u_volume_export_dim_offset"); + csctx->xdc_transform_id = glGetUniformLocation(csctx->programs[CS_UFORCES], + "u_xdc_transform"); csctx->out_data_tex_id = glGetUniformLocation(csctx->programs[CS_UFORCES], "u_out_data_tex"); csctx->mip_view_tex_id = glGetUniformLocation(csctx->programs[CS_MIN_MAX], "u_mip_view_tex"); diff --git a/shaders/hercules.glsl b/shaders/hercules.glsl @@ -35,6 +35,7 @@ layout(r32f, location = 2) uniform writeonly image3D u_out_volume_tex; layout(location = 3) uniform int u_volume_export_pass; layout(location = 4) uniform ivec3 u_volume_export_dim_offset; +layout(location = 5) uniform mat3 u_xdc_transform; #define C_SPLINE 0.5 @@ -73,27 +74,33 @@ vec2 cubic(uint ridx, float x) } #endif -void main() +vec3 calc_image_point(vec3 voxel) { - vec3 voxel = vec3(gl_GlobalInvocationID.xyz) + vec3(u_volume_export_dim_offset); - ivec3 out_coord = ivec3(gl_GlobalInvocationID.xyz) + u_volume_export_dim_offset; ivec3 out_data_dim; - if (u_volume_export_pass == 0) out_data_dim = imageSize(u_out_data_tex); else out_data_dim = imageSize(u_out_volume_tex); - /* NOTE: Convert pixel to physical coordinates */ - vec4 xdc_size = xdc_corner1 + xdc_corner2 - xdc_origin; - vec3 edge1 = xdc_corner1.xyz - xdc_origin.xyz; - vec3 edge2 = xdc_corner2.xyz - xdc_origin.xyz; - vec3 xdc_normal = cross(edge2, edge1); - xdc_normal /= length(xdc_normal); vec4 output_size = abs(output_max_coord - output_min_coord); vec3 image_point = output_min_coord.xyz + voxel * output_size.xyz / out_data_dim.xyz; if (u_volume_export_pass == 0) image_point.y = off_axis_pos; + /* NOTE: move the image point into xdc space */ + image_point = u_xdc_transform * image_point; + return image_point; +} + +void main() +{ + vec3 voxel = vec3(gl_GlobalInvocationID.xyz) + vec3(u_volume_export_dim_offset); + ivec3 out_coord = ivec3(gl_GlobalInvocationID.xyz) + u_volume_export_dim_offset; + + /* NOTE: Convert pixel to physical coordinates */ + vec3 edge1 = xdc_corner1.xyz - xdc_origin.xyz; + vec3 edge2 = xdc_corner2.xyz - xdc_origin.xyz; + vec3 image_point = calc_image_point(voxel); + /* NOTE: used for constant F# dynamic receive apodization. This is implemented as: * * / |x_e - x_i|\ @@ -101,7 +108,7 @@ void main() * \ |z_e - z_i|/ * * where x,z_e are transducer element positions and x,z_i are image positions. */ - float f_num = output_size.z / output_size.x; + float f_num = 0.5; float apod_arg = f_num * 0.5 * radians(360) / abs(image_point.z); /* NOTE: for I-Q data phase correction */ diff --git a/shaders/uforces.glsl b/shaders/uforces.glsl @@ -35,6 +35,7 @@ layout(r32f, location = 2) uniform writeonly image3D u_out_volume_tex; layout(location = 3) uniform int u_volume_export_pass; layout(location = 4) uniform ivec3 u_volume_export_dim_offset; +layout(location = 5) uniform mat3 u_xdc_transform; #define C_SPLINE 0.5 @@ -70,23 +71,29 @@ vec2 cubic(uint ridx, float x) } #endif +vec3 calc_image_point(vec3 voxel) +{ + ivec3 out_data_dim = imageSize(u_out_data_tex); + vec4 output_size = abs(output_max_coord - output_min_coord); + vec3 image_point = output_min_coord.xyz + voxel * output_size.xyz / out_data_dim.xyz; + + /* TODO: fix the math so that the image plane can be aritrary */ + image_point.y = 0; + + /* NOTE: move the image point into xdc space */ + image_point = u_xdc_transform * image_point; + return image_point; +} + void main() { vec3 voxel = vec3(gl_GlobalInvocationID); ivec3 out_coord = ivec3(gl_GlobalInvocationID); - ivec3 out_data_dim = imageSize(u_out_data_tex); /* NOTE: Convert voxel to physical coordinates */ - vec4 xdc_size = xdc_corner1 + xdc_corner2 - xdc_origin; vec3 edge1 = xdc_corner1.xyz - xdc_origin.xyz; vec3 edge2 = xdc_corner2.xyz - xdc_origin.xyz; - vec3 xdc_normal = cross(edge2, edge1); - xdc_normal /= length(xdc_normal); - vec4 output_size = abs(output_max_coord - output_min_coord); - vec3 image_point = output_min_coord.xyz + voxel * output_size.xyz / out_data_dim.xyz; - - /* TODO: fix the math so that the image plane can be aritrary */ - image_point.y = 0; + vec3 image_point = calc_image_point(voxel); /* NOTE: used for constant F# dynamic receive apodization. This is implemented as: * @@ -95,7 +102,7 @@ void main() * \ |z_e - z_i|/ * * where x,z_e are transducer element positions and x,z_i are image positions. */ - float f_num = 0.5; //output_size.z / output_size.x; + float f_num = 0.5; float apod_arg = f_num * 0.5 * radians(360) / abs(image_point.z); /* NOTE: for I-Q data phase correction */ diff --git a/util.c b/util.c @@ -64,6 +64,43 @@ round_down_power_of_2(u32 a) return result; } +static v3 +cross(v3 a, v3 b) +{ + v3 result = { + .x = a.y * b.z - a.z * b.y, + .y = a.z * b.x - a.x * b.z, + .z = a.x * b.y - a.y * b.x, + }; + return result; +} + +static v3 +sub_v3(v3 a, v3 b) +{ + v3 result = { + .x = a.x - b.x, + .y = a.y - b.y, + .z = a.z - b.z, + }; + return result; +} + +static f32 +length_v3(v3 a) +{ + f32 result = a.x * a.x + a.y * a.y + a.z * a.z; + return result; +} + +static v3 +normalize_v3(v3 a) +{ + f32 length = length_v3(a); + v3 result = {.x = a.x / length, .y = a.y / length, .z = a.z / length}; + return result; +} + static void fill_hadamard(i32 *m, u32 dim) { diff --git a/util.h b/util.h @@ -64,6 +64,12 @@ typedef union { u32 E[4]; } uv4; +typedef union { + struct { f32 x, y, z; }; + struct { f32 w, h, d; }; + f32 E[3]; +} v3; + #include "util.c" #endif /* _UTIL_H_ */