Commit: ba2fb5d57eecb37a64c2790694cd6d166f35c612
Parent: dbfa80b7ab329fad1db7797dc9f178815e75b3f9
Author: Randy Palamar
Date: Tue, 1 Oct 2024 16:05:08 -0600
restructure and finish image averaging implementation
Now image averaging should work just fine with multiple arrays. A
bunch of useless uniform calls were dropped as well - images
should use binding points not uniform locations.
Diffstat:
11 files changed, 98 insertions(+), 77 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -14,23 +14,39 @@ static void
alloc_output_image(BeamformerCtx *ctx)
{
BeamformerParameters *bp = &ctx->params->raw;
+ ComputeShaderCtx *cs = &ctx->csctx;
+
ctx->out_data_dim.x = round_down_power_of_2(ORONE(bp->output_points.x));
ctx->out_data_dim.y = round_down_power_of_2(ORONE(bp->output_points.y));
ctx->out_data_dim.z = round_down_power_of_2(ORONE(bp->output_points.z));
+ ctx->out_data_dim.w = CLAMP(bp->output_points.w, 0, ARRAY_COUNT(cs->sum_textures));
bp->output_points = ctx->out_data_dim;
/* NOTE: allocate storage for beamformed output data;
* this is shared between compute and fragment shaders */
uv4 odim = ctx->out_data_dim;
u32 max_dim = MAX(odim.x, MAX(odim.y, odim.z));
- /* TODO: does this actually matter or is 0 fine? */
ctx->out_texture_unit = 0;
ctx->out_texture_mips = _tzcnt_u32(max_dim) + 1;
+
glActiveTexture(GL_TEXTURE0);
- glDeleteTextures(ARRAY_COUNT(ctx->out_textures), ctx->out_textures);
- glGenTextures(ARRAY_COUNT(ctx->out_textures), ctx->out_textures);
- for (u32 i = 0; i < ARRAY_COUNT(ctx->out_textures); i++) {
- glBindTexture(GL_TEXTURE_3D, ctx->out_textures[i]);
+ glDeleteTextures(1, &ctx->out_texture);
+ glGenTextures(1, &ctx->out_texture);
+ glBindTexture(GL_TEXTURE_3D, ctx->out_texture);
+ glTexStorage3D(GL_TEXTURE_3D, ctx->out_texture_mips, GL_RG32F, odim.x, odim.y, odim.z);
+
+ glDeleteTextures(ARRAY_COUNT(cs->sum_textures), cs->sum_textures);
+ glGenTextures(odim.w, cs->sum_textures);
+ for (u32 i = 0; i < odim.w; i++) {
+ glBindTexture(GL_TEXTURE_3D, cs->sum_textures[i]);
+ glTexStorage3D(GL_TEXTURE_3D, ctx->out_texture_mips, GL_RG32F, odim.x, odim.y, odim.z);
+ }
+
+ bp->array_count = CLAMP(bp->array_count, 1, ARRAY_COUNT(cs->array_textures));
+ glDeleteTextures(ARRAY_COUNT(cs->array_textures), cs->array_textures);
+ glGenTextures(bp->array_count, cs->array_textures);
+ for (u32 i = 0; i < bp->array_count; i++) {
+ glBindTexture(GL_TEXTURE_3D, cs->array_textures[i]);
glTexStorage3D(GL_TEXTURE_3D, ctx->out_texture_mips, GL_RG32F, odim.x, odim.y, odim.z);
}
@@ -146,7 +162,6 @@ do_volume_computation_step(BeamformerCtx *ctx, enum compute_shaders shader)
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_3D, e->volume_texture);
glBindImageTexture(0, e->volume_texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_R32F);
- glUniform1i(cs->out_data_tex_id, 0);
glUniform1i(cs->volume_export_pass_id, 1);
/* NOTE: We must tile this otherwise GL will kill us for taking too long */
@@ -167,12 +182,25 @@ do_volume_computation_step(BeamformerCtx *ctx, enum compute_shaders shader)
}
static void
+do_sum_shader(ComputeShaderCtx *cs, u32 *in_textures, u32 in_texture_count, u32 out_texture, uv4 out_data_dim)
+{
+ glBindImageTexture(0, out_texture, 0, GL_TRUE, 0, GL_READ_WRITE, GL_RG32F);
+ glUniform1f(cs->sum_prescale_id, 1 / (f32)in_texture_count);
+ for (u32 i = 0; i < in_texture_count; i++) {
+ glBindImageTexture(1, in_textures[i], 0, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F);
+ glDispatchCompute(ORONE(out_data_dim.x / 32),
+ ORONE(out_data_dim.y),
+ ORONE(out_data_dim.z / 32));
+ glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
+ }
+}
+
+static void
do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
{
ComputeShaderCtx *csctx = &ctx->csctx;
uv2 rf_raw_dim = ctx->params->raw.rf_raw_dim;
size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16);
- u32 sum_count = ARRAY_COUNT(ctx->out_textures); /* TODO: ctx->params->raw.frame_average_count */
glBeginQuery(GL_TIME_ELAPSED, csctx->timer_ids[csctx->timer_index][shader]);
@@ -212,16 +240,10 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
break;
case CS_MIN_MAX: {
- u32 texture = ctx->out_textures[ctx->out_texture_index];
- glBindImageTexture(ctx->out_texture_unit, texture, 0, GL_FALSE, 0,
- GL_WRITE_ONLY, GL_RG32F);
- glUniform1i(csctx->out_data_tex_id, ctx->out_texture_unit);
+ u32 texture = ctx->out_texture;
for (u32 i = 1; i < ctx->out_texture_mips; i++) {
- u32 otu = ctx->out_texture_unit;
- glBindImageTexture(otu + 1, texture, i - 1, GL_FALSE, 0, GL_READ_ONLY, GL_RG32F);
- glBindImageTexture(otu + 2, texture, i - 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RG32F);
- glUniform1i(csctx->out_data_tex_id, otu + 1);
- glUniform1i(csctx->mip_view_tex_id, otu + 2);
+ glBindImageTexture(0, texture, i - 1, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F);
+ glBindImageTexture(1, texture, i - 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F);
glUniform1i(csctx->mips_level_id, i);
u32 width = ctx->out_data_dim.x >> i;
@@ -232,7 +254,7 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
}
} break;
case CS_HERCULES:
- case CS_UFORCES:
+ case CS_UFORCES: {
if (ctx->export_ctx.state & ES_START) {
/* NOTE: on the first frame of compute make a copy of the rf data */
size rf_size = decoded_data_size(csctx);
@@ -242,15 +264,25 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
ctx->export_ctx.rf_data_ssbo, 0, 0, rf_size);
}
+ BeamformerParameters *bp = &ctx->params->raw;
+
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]);
glUniform3iv(csctx->volume_export_dim_offset_id, 1, (i32 []){0, 0, 0});
glUniform1i(csctx->volume_export_pass_id, 0);
glActiveTexture(GL_TEXTURE0 + ctx->out_texture_unit);
- for (u32 i = 0; i < ctx->params->raw.array_count; i++) {
- BeamformerParameters *bp = &ctx->params->raw;
- u32 tex_idx = (ctx->out_texture_index + i) % ARRAY_COUNT(ctx->out_textures);
- u32 texture = ctx->out_textures[tex_idx];
+ for (u32 i = 0; i < bp->array_count; i++) {
+ u32 texture;
+ if (bp->array_count == 1) {
+ if (ctx->out_data_dim.w > 0) {
+ texture = csctx->sum_textures[csctx->sum_texture_index];
+ } else {
+ texture = ctx->out_texture;
+ }
+ } else {
+ texture = csctx->array_textures[i];
+ }
+
m3 xdc_transform = observation_direction_to_xdc_space((v3){.z = 1},
bp->xdc_origin[i].xyz,
bp->xdc_corner1[i].xyz,
@@ -258,36 +290,27 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
glBindTexture(GL_TEXTURE_3D, texture);
glBindImageTexture(ctx->out_texture_unit, texture, 0, GL_TRUE, 0,
GL_WRITE_ONLY, GL_RG32F);
- glUniform1i(csctx->out_data_tex_id, ctx->out_texture_unit);
glUniform1i(csctx->xdc_index_id, i);
glUniformMatrix3fv(csctx->xdc_transform_id, 1, GL_FALSE, xdc_transform.E);
glDispatchCompute(ORONE(ctx->out_data_dim.x / 32),
ctx->out_data_dim.y,
ORONE(ctx->out_data_dim.z / 32));
}
- if (ctx->params->raw.array_count == 1)
- break;
- /* TODO: we can't use this as a ring buffer yet since we need to keep a fixed
- * index for output display */
- //ctx->out_texture_index = (ctx->out_texture_index + ctx->params->raw.array_count)
- // % ARRAY_COUNT(ctx->out_textures);
- sum_count = ctx->params->raw.array_count;
- /* NOTE: FALLTHROUGH; sum implicit when beamforming mulitple arrays */
+ if (bp->array_count > 1) {
+ if (ctx->out_data_dim.w > 0)
+ do_sum_shader(csctx, csctx->array_textures, bp->array_count,
+ csctx->sum_textures[csctx->sum_texture_index],
+ ctx->out_data_dim);
+ else
+ do_sum_shader(csctx, csctx->array_textures, bp->array_count,
+ ctx->out_texture, ctx->out_data_dim);
+ }
+ } break;
case CS_SUM: {
- u32 otu = 0;
- /* TODO: extra "display" texture to always sum into */
- u32 out_texture = ctx->out_textures[ctx->out_texture_index];
- glBindImageTexture(otu, out_texture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_RG32F);
- glUniform1i(csctx->sum_out_img_id, otu);
- for (u32 i = ctx->out_texture_index + 1; i < sum_count; i++) {
- u32 tex_idx = i % ARRAY_COUNT(ctx->out_textures);
- u32 in_texture = ctx->out_textures[tex_idx];
- glBindImageTexture(otu + 1, in_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RG32F);
- glUniform1i(csctx->sum_in_img_id, otu + 1);
- glDispatchCompute(ORONE(ctx->out_data_dim.x / 32),
- ctx->out_data_dim.y,
- ORONE(ctx->out_data_dim.z / 32));
- glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
+ u32 frame_count = ctx->out_data_dim.w;
+ if (frame_count) {
+ do_sum_shader(csctx, csctx->sum_textures, frame_count, ctx->out_texture, ctx->out_data_dim);
+ csctx->sum_texture_index = (csctx->sum_texture_index + 1) % frame_count;
}
} break;
default: ASSERT(0);
@@ -444,8 +467,7 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
FragmentShaderCtx *fs = &ctx->fsctx;
glUseProgram(fs->shader.id);
glActiveTexture(GL_TEXTURE0 + ctx->out_texture_unit);
- /* TODO: seperate display texture */
- glBindTexture(GL_TEXTURE_3D, ctx->out_textures[ctx->out_texture_index]);
+ glBindTexture(GL_TEXTURE_3D, ctx->out_texture);
glUniform1i(fs->out_data_tex_id, ctx->out_texture_unit);
glUniform1f(fs->db_cutoff_id, fs->db);
DrawTexture(fs->output.texture, 0, 0, WHITE);
diff --git a/beamformer.h b/beamformer.h
@@ -164,6 +164,14 @@ typedef struct {
GLsync timer_fences[MAX_FRAMES_IN_FLIGHT];
f32 last_frame_time[CS_LAST];
+ /* NOTE: circular buffer of textures for averaging.
+ * Only allocated up to configured frame average count */
+ u32 sum_textures[16];
+ u32 sum_texture_index;
+
+ /* NOTE: array output textures. Only allocated up to configured array count */
+ u32 array_textures[4];
+
/* NOTE: the raw_data_ssbo is allocated at 3x the required size to allow for tiled
* transfers when the GPU is running behind the CPU. It is not mapped on NVIDIA because
* their drivers _will_ store the buffer in the system memory. This doesn't happen
@@ -187,16 +195,13 @@ typedef struct {
uv4 dec_data_dim;
uv2 rf_raw_dim;
- i32 out_data_tex_id;
- i32 mip_view_tex_id;
i32 mips_level_id;
i32 volume_export_pass_id;
i32 volume_export_dim_offset_id;
i32 xdc_transform_id;
i32 xdc_index_id;
- i32 sum_out_img_id;
- i32 sum_in_img_id;
+ i32 sum_prescale_id;
} ComputeShaderCtx;
typedef struct {
@@ -238,10 +243,7 @@ typedef struct {
InputState is;
uv4 out_data_dim;
- /* NOTE: circular buffer of output textures; useful for multi array or averaging */
- /* TODO: support averaging for multi array imaging */
- u32 out_textures[4];
- u32 out_texture_index;
+ u32 out_texture;
u32 out_texture_unit;
u32 out_texture_mips;
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -21,7 +21,7 @@ typedef struct {
v4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */
v4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */
uv4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */
- uv4 output_points; /* Width * Height * Depth; last element ignored */
+ uv4 output_points; /* Width * Height * Depth * (Frame Average Count) */
v4 output_min_coordinate; /* [m] Back-Top-Left corner of output region (w ignored) */
v4 output_max_coordinate; /* [m] Front-Bottom-Right corner of output region (w ignored)*/
uv2 rf_raw_dim; /* Raw Data Dimensions */
@@ -36,4 +36,5 @@ typedef struct {
u32 uforces; /* mode is UFORCES (1) or FORCES (0) */
f32 off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */
i32 beamform_plane; /* Plane to Beamform in 2D HERCULES */
+ f32 _pad[3];
} BeamformerParameters;
diff --git a/main.c b/main.c
@@ -127,12 +127,9 @@ reload_shaders(BeamformerCtx *ctx, Arena a)
csctx->xdc_index_id = glGetUniformLocation(csctx->programs[CS_UFORCES],
"u_xdc_index");
- csctx->out_data_tex_id = glGetUniformLocation(csctx->programs[CS_UFORCES], "u_out_data_tex");
- csctx->mip_view_tex_id = glGetUniformLocation(csctx->programs[CS_MIN_MAX], "u_mip_view_tex");
csctx->mips_level_id = glGetUniformLocation(csctx->programs[CS_MIN_MAX], "u_mip_map");
- csctx->sum_out_img_id = glGetUniformLocation(csctx->programs[CS_SUM], "u_sum_out_img");
- csctx->sum_in_img_id = glGetUniformLocation(csctx->programs[CS_SUM], "u_sum_in_img");
+ csctx->sum_prescale_id = glGetUniformLocation(csctx->programs[CS_SUM], "u_prescale");
Shader updated_fs = LoadShader(NULL, "shaders/render.glsl");
if (updated_fs.id != rlGetShaderIdDefault()) {
diff --git a/shaders/demod.glsl b/shaders/demod.glsl
@@ -18,7 +18,7 @@ layout(std140, binding = 0) uniform parameters {
vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */
vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */
uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */
- uvec4 output_points; /* Width * Height * Depth; last element ignored */
+ uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */
vec4 output_min_coord; /* [m] Top left corner of output region */
vec4 output_max_coord; /* [m] Bottom right corner of output region */
uvec2 rf_raw_dim; /* Raw Data Dimensions */
diff --git a/shaders/hadamard.glsl b/shaders/hadamard.glsl
@@ -22,7 +22,7 @@ layout(std140, binding = 0) uniform parameters {
vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */
vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */
uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */
- uvec4 output_points; /* Width * Height * Depth; last element ignored */
+ uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */
vec4 output_min_coord; /* [m] Top left corner of output region */
vec4 output_max_coord; /* [m] Bottom right corner of output region */
uvec2 rf_raw_dim; /* Raw Data Dimensions */
diff --git a/shaders/hercules.glsl b/shaders/hercules.glsl
@@ -14,7 +14,7 @@ layout(std140, binding = 0) uniform parameters {
vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */
vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */
uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */
- uvec4 output_points; /* Width * Height * Depth; last element ignored */
+ uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */
vec4 output_min_coord; /* [m] Top left corner of output region */
vec4 output_max_coord; /* [m] Bottom right corner of output region */
uvec2 rf_raw_dim; /* Raw Data Dimensions */
@@ -31,7 +31,7 @@ layout(std140, binding = 0) uniform parameters {
int beamform_plane; /* Plane to Beamform in 2D HERCULES */
};
-layout(rg32f, location = 1) uniform writeonly image3D u_out_data_tex;
+layout(rg32f, binding = 0) writeonly uniform image3D u_out_data_tex;
layout(location = 2) uniform int u_volume_export_pass;
layout(location = 3) uniform ivec3 u_volume_export_dim_offset;
@@ -94,7 +94,7 @@ void main()
vec3 voxel = vec3(gl_GlobalInvocationID.xyz) + vec3(u_volume_export_dim_offset);
ivec3 out_coord = ivec3(gl_GlobalInvocationID.xyz) + u_volume_export_dim_offset;
- /* NOTE: Convert pixel to physical coordinates */
+ /* NOTE: Convert voxel to physical coordinates */
vec3 edge1 = xdc_corner1[u_xdc_index].xyz - xdc_origin[u_xdc_index].xyz;
vec3 edge2 = xdc_corner2[u_xdc_index].xyz - xdc_origin[u_xdc_index].xyz;
vec3 image_point = calc_image_point(voxel);
@@ -167,6 +167,5 @@ void main()
rdist[direction] = starting_point[direction];
rdist[direction ^ 1] -= delta[direction ^ 1];
}
- float val = length(sum);
- imageStore(u_out_data_tex, out_coord, vec4(val));
+ imageStore(u_out_data_tex, out_coord, vec4(length(sum)));
}
diff --git a/shaders/min_max.glsl b/shaders/min_max.glsl
@@ -5,9 +5,9 @@
#version 460 core
layout(local_size_x = 32, local_size_y = 1, local_size_z = 32) in;
-layout(rg32f, location = 1) uniform readonly image3D u_out_data_tex;
-layout(rg32f, location = 2) uniform writeonly image3D u_mip_view_tex;
-layout(location = 3) uniform int u_mip_map = 0;
+layout(rg32f, binding = 0) readonly uniform image3D u_out_data_tex;
+layout(rg32f, binding = 1) writeonly uniform image3D u_mip_view_tex;
+layout(location = 1) uniform int u_mip_map = 0;
void main()
{
diff --git a/shaders/render.glsl b/shaders/render.glsl
@@ -28,7 +28,7 @@ void main()
float smp = length(texelFetch(u_out_data_tex, smp_coord, 0).xy);
float absmax = max(abs(min_max.y), abs(min_max.x));
- smp = 20 * log(abs(smp) / absmax) / log(10);
+ smp = 20 * log(smp / absmax) / log(10);
smp = clamp(smp, u_db_cutoff, 0) / u_db_cutoff;
smp = 1 - smp;
diff --git a/shaders/sum.glsl b/shaders/sum.glsl
@@ -2,12 +2,13 @@
layout(local_size_x = 32, local_size_y = 1, local_size_z = 32) in;
-layout(rg32f, location = 1) uniform image3D u_out_img;
-layout(rg32f, location = 2) readonly uniform image3D u_in_img;
+layout(rg32f, binding = 0) uniform image3D u_out_img;
+layout(rg32f, binding = 1) readonly uniform image3D u_in_img;
+layout(location = 1) uniform float u_prescale = 1.0;
void main()
{
ivec3 voxel = ivec3(gl_GlobalInvocationID);
- vec4 sum = imageLoad(u_out_img, voxel) + imageLoad(u_in_img, voxel);
+ vec4 sum = imageLoad(u_out_img, voxel) + u_prescale * imageLoad(u_in_img, voxel);
imageStore(u_out_img, voxel, sum);
}
diff --git a/shaders/uforces.glsl b/shaders/uforces.glsl
@@ -14,7 +14,7 @@ layout(std140, binding = 0) uniform parameters {
vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */
vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */
uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */
- uvec4 output_points; /* Width * Height * Depth; last element ignored */
+ uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */
vec4 output_min_coord; /* [m] Top left corner of output region */
vec4 output_max_coord; /* [m] Bottom right corner of output region */
uvec2 rf_raw_dim; /* Raw Data Dimensions */
@@ -31,7 +31,7 @@ layout(std140, binding = 0) uniform parameters {
int beamform_plane; /* Plane to Beamform in 2D HERCULES */
};
-layout(rg32f, location = 1) writeonly uniform image3D u_out_data_tex;
+layout(rg32f, binding = 0) writeonly uniform image3D u_out_data_tex;
layout(location = 2) uniform int u_volume_export_pass;
layout(location = 3) uniform ivec3 u_volume_export_dim_offset;
@@ -146,6 +146,5 @@ void main()
ridx += dec_data_dim.x;
}
}
- float val = length(sum);
- imageStore(u_out_data_tex, out_coord, vec4(val, val, 0, 0));
+ imageStore(u_out_data_tex, out_coord, vec4(length(sum)));
}