Commit: e19eef9f2ab1d314dd2cc5c028e5f8397e758d27
Parent: bc05b689c633829751fdd6ba20cfc6fa62de1828
Author: Randy Palamar
Date: Thu, 31 Oct 2024 10:01:58 -0600
move shader parameters ubo to static s8
editing this in so many places was getting error prone and tedious
Diffstat:
9 files changed, 77 insertions(+), 105 deletions(-)
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -35,3 +35,28 @@ typedef struct {
i32 beamform_plane; /* Plane to Beamform in 2D HERCULES */
f32 _pad[1];
} BeamformerParameters;
+
+static s8 g_compute_shader_header = s8("\
+#version 460 core\n\
+\n\
+layout(std140, binding = 0) uniform parameters {\n\
+ uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */\n\
+ uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */\n\
+ vec4 xdc_origin[4]; /* [m] Corner of transducer being treated as origin */\n\
+ vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */\n\
+ vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */\n\
+ uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */\n\
+ uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */\n\
+ vec4 output_min_coord; /* [m] Top left corner of output region */\n\
+ vec4 output_max_coord; /* [m] Bottom right corner of output region */\n\
+ uvec2 rf_raw_dim; /* Raw Data Dimensions */\n\
+ uint xdc_count; /* Number of Transducer Arrays (4 max) */\n\
+ uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */\n\
+ float speed_of_sound; /* [m/s] */\n\
+ float sampling_frequency; /* [Hz] */\n\
+ float center_frequency; /* [Hz] */\n\
+ float focal_depth; /* [m] */\n\
+ float time_offset; /* pulse length correction time [s] */\n\
+ float off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */\n\
+ int beamform_plane; /* Plane to Beamform in 2D HERCULES */\n\
+};\n\n");
diff --git a/build.sh b/build.sh
@@ -4,7 +4,7 @@ set -e
cflags="-march=native -std=c11 -O3 -Wall -I./external/include"
#cflags="${cflags} -fproc-stat-report"
#cflags="${cflags} -Rpass-missed=.*"
-libcflags="$cflags -fPIC -shared"
+libcflags="$cflags -fPIC -shared -Wno-unused-variable"
ldflags="-lm"
debug=${DEBUG}
diff --git a/helpers/ogl_beamformer_lib.h b/helpers/ogl_beamformer_lib.h
@@ -14,6 +14,10 @@ typedef float f32;
typedef double f64;
typedef ptrdiff_t size;
+#define ARRAY_COUNT(a) (sizeof(a) / sizeof(*a))
+typedef struct { size len; u8 *data; } s8;
+#define s8(s) (s8){.len = ARRAY_COUNT(s) - 1, .data = (u8 *)s}
+
#if defined(_WIN32)
#define LIB_FN __declspec(dllexport)
#else
diff --git a/shaders/demod.glsl b/shaders/demod.glsl
@@ -1,5 +1,4 @@
/* See LICENSE for license details. */
-#version 460 core
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
layout(std430, binding = 1) readonly restrict buffer buffer_1 {
@@ -14,28 +13,6 @@ layout(std430, binding = 3) readonly restrict buffer buffer_3 {
float filter_coefficients[];
};
-layout(std140, binding = 0) uniform parameters {
- uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */
- uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */
- vec4 xdc_origin[4]; /* [m] Corner of transducer being treated as origin */
- vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */
- vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */
- uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */
- uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */
- vec4 output_min_coord; /* [m] Top left corner of output region */
- vec4 output_max_coord; /* [m] Bottom right corner of output region */
- uvec2 rf_raw_dim; /* Raw Data Dimensions */
- uint xdc_count; /* Number of Transducer Arrays (4 max) */
- uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */
- float speed_of_sound; /* [m/s] */
- float sampling_frequency; /* [Hz] */
- float center_frequency; /* [Hz] */
- float focal_depth; /* [m] */
- float time_offset; /* pulse length correction time [s] */
- float off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */
- int beamform_plane; /* Plane to Beamform in 2D HERCULES */
-};
-
layout(location = 0) uniform uint u_filter_order = 0;
void main()
diff --git a/shaders/hadamard.glsl b/shaders/hadamard.glsl
@@ -1,5 +1,4 @@
/* See LICENSE for license details. */
-#version 460 core
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
layout(std430, binding = 1) readonly restrict buffer buffer_1 {
@@ -14,28 +13,6 @@ layout(std430, binding = 3) readonly restrict buffer buffer_3 {
int hadamard[];
};
-layout(std140, binding = 0) uniform parameters {
- uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */
- uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */
- vec4 xdc_origin[4]; /* [m] Corner of transducer being treated as origin */
- vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */
- vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */
- uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */
- uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */
- vec4 output_min_coord; /* [m] Top left corner of output region */
- vec4 output_max_coord; /* [m] Bottom right corner of output region */
- uvec2 rf_raw_dim; /* Raw Data Dimensions */
- uint xdc_count; /* Number of Transducer Arrays (4 max) */
- uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */
- float speed_of_sound; /* [m/s] */
- float sampling_frequency; /* [Hz] */
- float center_frequency; /* [Hz] */
- float focal_depth; /* [m] */
- float time_offset; /* pulse length correction time [s] */
- float off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */
- int beamform_plane; /* Plane to Beamform in 2D HERCULES */
-};
-
void main()
{
/* NOTE: each invocation takes a time sample and a receive channel.
diff --git a/shaders/hercules.glsl b/shaders/hercules.glsl
@@ -1,33 +1,10 @@
/* See LICENSE for license details. */
-#version 460 core
layout(local_size_x = 32, local_size_y = 1, local_size_z = 32) in;
layout(std430, binding = 1) readonly restrict buffer buffer_1 {
vec2 rf_data[];
};
-layout(std140, binding = 0) uniform parameters {
- uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */
- uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */
- vec4 xdc_origin[4]; /* [m] Corner of transducer being treated as origin */
- vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */
- vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */
- uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */
- uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */
- vec4 output_min_coord; /* [m] Top left corner of output region */
- vec4 output_max_coord; /* [m] Bottom right corner of output region */
- uvec2 rf_raw_dim; /* Raw Data Dimensions */
- uint xdc_count; /* Number of Transducer Arrays (4 max) */
- uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */
- float speed_of_sound; /* [m/s] */
- float sampling_frequency; /* [Hz] */
- float center_frequency; /* [Hz] */
- float focal_depth; /* [m] */
- float time_offset; /* pulse length correction time [s] */
- float off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */
- int beamform_plane; /* Plane to Beamform in 2D HERCULES */
-};
-
layout(rg32f, binding = 0) writeonly uniform image3D u_out_data_tex;
layout(location = 2) uniform int u_volume_export_pass;
diff --git a/shaders/uforces.glsl b/shaders/uforces.glsl
@@ -1,33 +1,10 @@
/* See LICENSE for license details. */
-#version 460 core
layout(local_size_x = 32, local_size_y = 1, local_size_z = 32) in;
layout(std430, binding = 1) readonly restrict buffer buffer_1 {
vec2 rf_data[];
};
-layout(std140, binding = 0) uniform parameters {
- uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */
- uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */
- vec4 xdc_origin[4]; /* [m] Corner of transducer being treated as origin */
- vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */
- vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */
- uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */
- uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */
- vec4 output_min_coord; /* [m] Top left corner of output region */
- vec4 output_max_coord; /* [m] Bottom right corner of output region */
- uvec2 rf_raw_dim; /* Raw Data Dimensions */
- uint xdc_count; /* Number of Transducer Arrays (4 max) */
- uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */
- float speed_of_sound; /* [m/s] */
- float sampling_frequency; /* [Hz] */
- float center_frequency; /* [Hz] */
- float focal_depth; /* [m] */
- float time_offset; /* pulse length correction time [s] */
- float off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */
- int beamform_plane; /* Plane to Beamform in 2D HERCULES */
-};
-
layout(rg32f, binding = 0) writeonly uniform image3D u_out_data_tex;
layout(location = 2) uniform int u_volume_export_pass;
diff --git a/static.c b/static.c
@@ -1,11 +1,14 @@
/* See LICENSE for license details. */
-static s8 compute_shader_paths[CS_LAST] = {
- [CS_HADAMARD] = s8("shaders/hadamard.glsl"),
- [CS_HERCULES] = s8("shaders/hercules.glsl"),
- [CS_DEMOD] = s8("shaders/demod.glsl"),
- [CS_MIN_MAX] = s8("shaders/min_max.glsl"),
- [CS_SUM] = s8("shaders/sum.glsl"),
- [CS_UFORCES] = s8("shaders/uforces.glsl"),
+static struct {
+ s8 path;
+ b32 needs_header;
+} compute_shaders[CS_LAST] = {
+ [CS_HADAMARD] = {.path = s8("shaders/hadamard.glsl"), .needs_header = 1},
+ [CS_HERCULES] = {.path = s8("shaders/hercules.glsl"), .needs_header = 1},
+ [CS_DEMOD] = {.path = s8("shaders/demod.glsl"), .needs_header = 1},
+ [CS_MIN_MAX] = {.path = s8("shaders/min_max.glsl"), .needs_header = 0},
+ [CS_SUM] = {.path = s8("shaders/sum.glsl"), .needs_header = 0},
+ [CS_UFORCES] = {.path = s8("shaders/uforces.glsl"), .needs_header = 1},
};
#ifndef _DEBUG
@@ -182,19 +185,28 @@ static void
reload_shaders(BeamformerCtx *ctx, Arena a)
{
ComputeShaderCtx *csctx = &ctx->csctx;
+ s8 header_in_arena = push_s8(&a, g_compute_shader_header);
for (u32 i = 0; i < ARRAY_COUNT(csctx->programs); i++) {
- if (!compute_shader_paths[i].len)
+ if (!compute_shaders[i].path.len)
continue;
Arena tmp = a;
- FileStats fs = os_get_file_stats((char *)compute_shader_paths[i].data);
- s8 shader_text = os_read_file(&tmp, (char *)compute_shader_paths[i].data, fs.filesize);
+ FileStats fs = os_get_file_stats((char *)compute_shaders[i].path.data);
+ s8 shader_text = os_read_file(&tmp, (char *)compute_shaders[i].path.data, fs.filesize);
if (shader_text.len == -1) {
os_write_err_msg(s8("failed to read shader: "));
- os_write_err_msg(compute_shader_paths[i]);
+ os_write_err_msg(compute_shaders[i].path);
os_write_err_msg(s8("\n"));
+ /* TODO: maybe we don't need to fail here */
os_fail();
}
+ /* NOTE: arena works as stack (since everything here is 1 byte aligned) */
+ if (compute_shaders[i].needs_header) {
+ shader_text.data -= header_in_arena.len;
+ shader_text.len += header_in_arena.len;
+ ASSERT(shader_text.data == header_in_arena.data);
+ }
+
u32 shader_id = compile_shader(tmp, GL_COMPUTE_SHADER, shader_text);
if (shader_id) {
diff --git a/util.c b/util.c
@@ -22,9 +22,24 @@ mem_clear(u8 *p, u8 c, size len)
}
static void
+mem_copy(void *src, void *dest, size n)
+{
+ ASSERT(n >= 0);
+ u8 *s = src, *d = dest;
+#if defined(__AVX512BW__)
+ /* TODO: aligned load/store and comparison */
+ for (; n >= 64; n -= 64, s += 64, d += 64)
+ _mm512_storeu_epi8(d, _mm512_loadu_epi8(s));
+#endif
+ for (; n >= 16; n -= 16, s += 16, d += 16)
+ _mm_storeu_si128((__m128i *)d, _mm_loadu_si128((__m128i*)s));
+ for (; n; n--) *d++ = *s++;
+}
+
+static void
mem_move(u8 *src, u8 *dest, size n)
{
- if (dest < src) while (n) { *dest++ = *src++; n--; }
+ if (dest < src) mem_copy(src, dest, n);
else while (n) { n--; dest[n] = src[n]; }
}
@@ -193,6 +208,14 @@ s8alloc(Arena *a, size len)
return (s8){ .data = alloc(a, u8, len), .len = len };
}
+static s8
+push_s8(Arena *a, s8 str)
+{
+ s8 result = s8alloc(a, str.len);
+ mem_copy(str.data, result.data, result.len);
+ return result;
+}
+
static b32
uv4_equal(uv4 a, uv4 b)
{