ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

Commit: e19eef9f2ab1d314dd2cc5c028e5f8397e758d27
Parent: bc05b689c633829751fdd6ba20cfc6fa62de1828
Author: Randy Palamar
Date:   Thu, 31 Oct 2024 10:01:58 -0600

move shader parameters ubo to static s8

editing this in so many places was getting error prone and tedious

Diffstat:
Mbeamformer_parameters.h | 25+++++++++++++++++++++++++
Mbuild.sh | 2+-
Mhelpers/ogl_beamformer_lib.h | 4++++
Mshaders/demod.glsl | 23-----------------------
Mshaders/hadamard.glsl | 23-----------------------
Mshaders/hercules.glsl | 23-----------------------
Mshaders/uforces.glsl | 23-----------------------
Mstatic.c | 34+++++++++++++++++++++++-----------
Mutil.c | 25++++++++++++++++++++++++-
9 files changed, 77 insertions(+), 105 deletions(-)

diff --git a/beamformer_parameters.h b/beamformer_parameters.h @@ -35,3 +35,28 @@ typedef struct { i32 beamform_plane; /* Plane to Beamform in 2D HERCULES */ f32 _pad[1]; } BeamformerParameters; + +static s8 g_compute_shader_header = s8("\ +#version 460 core\n\ +\n\ +layout(std140, binding = 0) uniform parameters {\n\ + uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */\n\ + uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */\n\ + vec4 xdc_origin[4]; /* [m] Corner of transducer being treated as origin */\n\ + vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */\n\ + vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */\n\ + uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */\n\ + uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */\n\ + vec4 output_min_coord; /* [m] Top left corner of output region */\n\ + vec4 output_max_coord; /* [m] Bottom right corner of output region */\n\ + uvec2 rf_raw_dim; /* Raw Data Dimensions */\n\ + uint xdc_count; /* Number of Transducer Arrays (4 max) */\n\ + uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */\n\ + float speed_of_sound; /* [m/s] */\n\ + float sampling_frequency; /* [Hz] */\n\ + float center_frequency; /* [Hz] */\n\ + float focal_depth; /* [m] */\n\ + float time_offset; /* pulse length correction time [s] */\n\ + float off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */\n\ + int beamform_plane; /* Plane to Beamform in 2D HERCULES */\n\ +};\n\n"); diff --git a/build.sh b/build.sh @@ -4,7 +4,7 @@ set -e cflags="-march=native -std=c11 -O3 -Wall -I./external/include" #cflags="${cflags} -fproc-stat-report" #cflags="${cflags} -Rpass-missed=.*" -libcflags="$cflags -fPIC -shared" +libcflags="$cflags -fPIC -shared -Wno-unused-variable" ldflags="-lm" debug=${DEBUG} diff --git a/helpers/ogl_beamformer_lib.h b/helpers/ogl_beamformer_lib.h @@ -14,6 +14,10 @@ typedef float f32; typedef double f64; typedef ptrdiff_t size; +#define ARRAY_COUNT(a) (sizeof(a) / sizeof(*a)) +typedef struct { size len; u8 *data; } s8; +#define s8(s) (s8){.len = ARRAY_COUNT(s) - 1, .data = (u8 *)s} + #if defined(_WIN32) #define LIB_FN __declspec(dllexport) #else diff --git a/shaders/demod.glsl b/shaders/demod.glsl @@ -1,5 +1,4 @@ /* See LICENSE for license details. */ -#version 460 core layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; layout(std430, binding = 1) readonly restrict buffer buffer_1 { @@ -14,28 +13,6 @@ layout(std430, binding = 3) readonly restrict buffer buffer_3 { float filter_coefficients[]; }; -layout(std140, binding = 0) uniform parameters { - uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */ - uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */ - vec4 xdc_origin[4]; /* [m] Corner of transducer being treated as origin */ - vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */ - vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */ - uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ - uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */ - vec4 output_min_coord; /* [m] Top left corner of output region */ - vec4 output_max_coord; /* [m] Bottom right corner of output region */ - uvec2 rf_raw_dim; /* Raw Data Dimensions */ - uint xdc_count; /* Number of Transducer Arrays (4 max) */ - uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ - float speed_of_sound; /* [m/s] */ - float sampling_frequency; /* [Hz] */ - float center_frequency; /* [Hz] */ - float focal_depth; /* [m] */ - float time_offset; /* pulse length correction time [s] */ - float off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */ - int beamform_plane; /* Plane to Beamform in 2D HERCULES */ -}; - layout(location = 0) uniform uint u_filter_order = 0; void main() diff --git a/shaders/hadamard.glsl b/shaders/hadamard.glsl @@ -1,5 +1,4 @@ /* See LICENSE for license details. */ -#version 460 core layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; layout(std430, binding = 1) readonly restrict buffer buffer_1 { @@ -14,28 +13,6 @@ layout(std430, binding = 3) readonly restrict buffer buffer_3 { int hadamard[]; }; -layout(std140, binding = 0) uniform parameters { - uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */ - uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */ - vec4 xdc_origin[4]; /* [m] Corner of transducer being treated as origin */ - vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */ - vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */ - uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ - uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */ - vec4 output_min_coord; /* [m] Top left corner of output region */ - vec4 output_max_coord; /* [m] Bottom right corner of output region */ - uvec2 rf_raw_dim; /* Raw Data Dimensions */ - uint xdc_count; /* Number of Transducer Arrays (4 max) */ - uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ - float speed_of_sound; /* [m/s] */ - float sampling_frequency; /* [Hz] */ - float center_frequency; /* [Hz] */ - float focal_depth; /* [m] */ - float time_offset; /* pulse length correction time [s] */ - float off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */ - int beamform_plane; /* Plane to Beamform in 2D HERCULES */ -}; - void main() { /* NOTE: each invocation takes a time sample and a receive channel. diff --git a/shaders/hercules.glsl b/shaders/hercules.glsl @@ -1,33 +1,10 @@ /* See LICENSE for license details. */ -#version 460 core layout(local_size_x = 32, local_size_y = 1, local_size_z = 32) in; layout(std430, binding = 1) readonly restrict buffer buffer_1 { vec2 rf_data[]; }; -layout(std140, binding = 0) uniform parameters { - uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */ - uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */ - vec4 xdc_origin[4]; /* [m] Corner of transducer being treated as origin */ - vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */ - vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */ - uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ - uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */ - vec4 output_min_coord; /* [m] Top left corner of output region */ - vec4 output_max_coord; /* [m] Bottom right corner of output region */ - uvec2 rf_raw_dim; /* Raw Data Dimensions */ - uint xdc_count; /* Number of Transducer Arrays (4 max) */ - uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ - float speed_of_sound; /* [m/s] */ - float sampling_frequency; /* [Hz] */ - float center_frequency; /* [Hz] */ - float focal_depth; /* [m] */ - float time_offset; /* pulse length correction time [s] */ - float off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */ - int beamform_plane; /* Plane to Beamform in 2D HERCULES */ -}; - layout(rg32f, binding = 0) writeonly uniform image3D u_out_data_tex; layout(location = 2) uniform int u_volume_export_pass; diff --git a/shaders/uforces.glsl b/shaders/uforces.glsl @@ -1,33 +1,10 @@ /* See LICENSE for license details. */ -#version 460 core layout(local_size_x = 32, local_size_y = 1, local_size_z = 32) in; layout(std430, binding = 1) readonly restrict buffer buffer_1 { vec2 rf_data[]; }; -layout(std140, binding = 0) uniform parameters { - uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */ - uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */ - vec4 xdc_origin[4]; /* [m] Corner of transducer being treated as origin */ - vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */ - vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */ - uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ - uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */ - vec4 output_min_coord; /* [m] Top left corner of output region */ - vec4 output_max_coord; /* [m] Bottom right corner of output region */ - uvec2 rf_raw_dim; /* Raw Data Dimensions */ - uint xdc_count; /* Number of Transducer Arrays (4 max) */ - uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */ - float speed_of_sound; /* [m/s] */ - float sampling_frequency; /* [Hz] */ - float center_frequency; /* [Hz] */ - float focal_depth; /* [m] */ - float time_offset; /* pulse length correction time [s] */ - float off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */ - int beamform_plane; /* Plane to Beamform in 2D HERCULES */ -}; - layout(rg32f, binding = 0) writeonly uniform image3D u_out_data_tex; layout(location = 2) uniform int u_volume_export_pass; diff --git a/static.c b/static.c @@ -1,11 +1,14 @@ /* See LICENSE for license details. */ -static s8 compute_shader_paths[CS_LAST] = { - [CS_HADAMARD] = s8("shaders/hadamard.glsl"), - [CS_HERCULES] = s8("shaders/hercules.glsl"), - [CS_DEMOD] = s8("shaders/demod.glsl"), - [CS_MIN_MAX] = s8("shaders/min_max.glsl"), - [CS_SUM] = s8("shaders/sum.glsl"), - [CS_UFORCES] = s8("shaders/uforces.glsl"), +static struct { + s8 path; + b32 needs_header; +} compute_shaders[CS_LAST] = { + [CS_HADAMARD] = {.path = s8("shaders/hadamard.glsl"), .needs_header = 1}, + [CS_HERCULES] = {.path = s8("shaders/hercules.glsl"), .needs_header = 1}, + [CS_DEMOD] = {.path = s8("shaders/demod.glsl"), .needs_header = 1}, + [CS_MIN_MAX] = {.path = s8("shaders/min_max.glsl"), .needs_header = 0}, + [CS_SUM] = {.path = s8("shaders/sum.glsl"), .needs_header = 0}, + [CS_UFORCES] = {.path = s8("shaders/uforces.glsl"), .needs_header = 1}, }; #ifndef _DEBUG @@ -182,19 +185,28 @@ static void reload_shaders(BeamformerCtx *ctx, Arena a) { ComputeShaderCtx *csctx = &ctx->csctx; + s8 header_in_arena = push_s8(&a, g_compute_shader_header); for (u32 i = 0; i < ARRAY_COUNT(csctx->programs); i++) { - if (!compute_shader_paths[i].len) + if (!compute_shaders[i].path.len) continue; Arena tmp = a; - FileStats fs = os_get_file_stats((char *)compute_shader_paths[i].data); - s8 shader_text = os_read_file(&tmp, (char *)compute_shader_paths[i].data, fs.filesize); + FileStats fs = os_get_file_stats((char *)compute_shaders[i].path.data); + s8 shader_text = os_read_file(&tmp, (char *)compute_shaders[i].path.data, fs.filesize); if (shader_text.len == -1) { os_write_err_msg(s8("failed to read shader: ")); - os_write_err_msg(compute_shader_paths[i]); + os_write_err_msg(compute_shaders[i].path); os_write_err_msg(s8("\n")); + /* TODO: maybe we don't need to fail here */ os_fail(); } + /* NOTE: arena works as stack (since everything here is 1 byte aligned) */ + if (compute_shaders[i].needs_header) { + shader_text.data -= header_in_arena.len; + shader_text.len += header_in_arena.len; + ASSERT(shader_text.data == header_in_arena.data); + } + u32 shader_id = compile_shader(tmp, GL_COMPUTE_SHADER, shader_text); if (shader_id) { diff --git a/util.c b/util.c @@ -22,9 +22,24 @@ mem_clear(u8 *p, u8 c, size len) } static void +mem_copy(void *src, void *dest, size n) +{ + ASSERT(n >= 0); + u8 *s = src, *d = dest; +#if defined(__AVX512BW__) + /* TODO: aligned load/store and comparison */ + for (; n >= 64; n -= 64, s += 64, d += 64) + _mm512_storeu_epi8(d, _mm512_loadu_epi8(s)); +#endif + for (; n >= 16; n -= 16, s += 16, d += 16) + _mm_storeu_si128((__m128i *)d, _mm_loadu_si128((__m128i*)s)); + for (; n; n--) *d++ = *s++; +} + +static void mem_move(u8 *src, u8 *dest, size n) { - if (dest < src) while (n) { *dest++ = *src++; n--; } + if (dest < src) mem_copy(src, dest, n); else while (n) { n--; dest[n] = src[n]; } } @@ -193,6 +208,14 @@ s8alloc(Arena *a, size len) return (s8){ .data = alloc(a, u8, len), .len = len }; } +static s8 +push_s8(Arena *a, s8 str) +{ + s8 result = s8alloc(a, str.len); + mem_copy(str.data, result.data, result.len); + return result; +} + static b32 uv4_equal(uv4 a, uv4 b) {