Commit: d3576ea1a8951f6d2a601ca573563d1ecb5239f3
Parent: f22798d2a027f8a51d0187e0da7cd9451eed0cb9
Author: Randy Palamar
Date: Sat, 7 Sep 2024 16:51:32 -0600
raw volume output dumping
This version is limited due to the GL driver killing your process
if a single call takes too long. It will be fixed soon.
Diffstat:
7 files changed, 100 insertions(+), 23 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -31,8 +31,6 @@ alloc_output_image(BeamformerCtx *ctx)
//SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_ANISOTROPIC_8X);
//SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_TRILINEAR);
SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_BILINEAR);
-
- ctx->flags &= ~ALLOC_OUT_TEX;
}
static void
@@ -94,8 +92,6 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a)
glDeleteBuffers(1, &cs->hadamard_ssbo);
glCreateBuffers(1, &cs->hadamard_ssbo);
glNamedBufferStorage(cs->hadamard_ssbo, hadamard_elements * sizeof(i32), hadamard, 0);
-
- ctx->flags &= ~ALLOC_SSBOS;
}
static void
@@ -166,12 +162,30 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
break;
case CS_HERCULES:
case CS_UFORCES:
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]);
+
+ /* NOTE: Do a volume computation before doing the normal display path */
+ if (ctx->export_ctx.state & ES_START) {
+ /* TODO: for large data this must be split over multiple compute calls
+ * otherwise the GL driver will kill you */
+ ExportCtx *e = &ctx->export_ctx;
+ glActiveTexture(GL_TEXTURE0);
+ glBindTexture(GL_TEXTURE_3D, e->volume_texture);
+ glBindImageTexture(0, e->volume_texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_R32F);
+ glUniform1i(e->volume_texture_id, 0);
+ glUniform1i(csctx->volume_export_pass_id, 1);
+ glDispatchCompute(ORONE(e->volume_dim.x / 32),
+ e->volume_dim.y,
+ ORONE(e->volume_dim.z / 32));
+ ctx->export_ctx.state = ES_DONE;
+ }
+
+ glUniform1i(csctx->volume_export_pass_id, 0);
glActiveTexture(GL_TEXTURE0 + ctx->out_texture_unit);
glBindTexture(GL_TEXTURE_3D, ctx->out_texture);
glBindImageTexture(ctx->out_texture_unit, ctx->out_texture, 0, GL_TRUE, 0,
GL_WRITE_ONLY, GL_RG32F);
glUniform1i(csctx->out_data_tex_id, ctx->out_texture_unit);
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]);
glDispatchCompute(ORONE(ctx->out_data_dim.x / 32),
ctx->out_data_dim.y,
ORONE(ctx->out_data_dim.z / 32));
@@ -220,10 +234,10 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
/* NOTE: Check for and Load RF Data into GPU */
if (os_poll_pipe(ctx->data_pipe)) {
ComputeShaderCtx *cs = &ctx->csctx;
- if (!uv4_equal(cs->dec_data_dim, bp->dec_data_dim) || ctx->flags & ALLOC_SSBOS)
+ if (!uv4_equal(cs->dec_data_dim, bp->dec_data_dim))
alloc_shader_storage(ctx, arena);
- if (!uv4_equal(ctx->out_data_dim, bp->output_points) || ctx->flags & ALLOC_OUT_TEX)
+ if (!uv4_equal(ctx->out_data_dim, bp->output_points))
alloc_output_image(ctx);
cs->raw_data_index = (cs->raw_data_index + 1) % ARRAY_COUNT(cs->raw_data_fences);
@@ -263,6 +277,15 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
glNamedBufferSubData(ctx->csctx.shared_ubo, 0, sizeof(*bp), bp);
ctx->params->upload = 0;
}
+
+ if (ctx->export_ctx.state & ES_START) {
+ uv4 edim = ctx->export_ctx.volume_dim;
+ glDeleteTextures(1, &ctx->export_ctx.volume_texture);
+ glCreateTextures(GL_TEXTURE_3D, 1, &ctx->export_ctx.volume_texture);
+ glTextureStorage3D(ctx->export_ctx.volume_texture, 1, GL_R32F,
+ edim.x, edim.y, edim.z);
+ }
+
u32 stages = ctx->params->compute_stages_count;
for (u32 i = 0; i < stages; i++) {
do_compute_shader(ctx, ctx->params->compute_stages[i]);
@@ -274,6 +297,19 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
glDeleteSync(ctx->csctx.timer_fences[tidx]);
ctx->csctx.timer_fences[tidx] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
ctx->csctx.timer_index = (tidx + 1) % ARRAY_COUNT(ctx->csctx.timer_fences);
+
+ if (ctx->export_ctx.state & ES_DONE) {
+ ExportCtx *e = &ctx->export_ctx;
+ uv4 dim = e->volume_dim;
+ size volume_out_size = dim.x * dim.y * dim.z * sizeof(f32);
+ e->volume_buf = os_alloc_arena(e->volume_buf, volume_out_size);
+ glGetTextureImage(e->volume_texture, 0, GL_RED, GL_FLOAT, volume_out_size,
+ e->volume_buf.beg);
+ s8 raw = {.len = volume_out_size, .data = e->volume_buf.beg};
+ if (!os_write_file("raw_volume.bin", raw))
+ TraceLog(LOG_WARNING, "failed to write output volume\n");
+ ctx->export_ctx.state = 0;
+ }
}
/* NOTE: draw output image texture using render fragment shader */
diff --git a/beamformer.h b/beamformer.h
@@ -49,8 +49,6 @@ typedef union {
enum program_flags {
RELOAD_SHADERS = 1 << 0,
- ALLOC_SSBOS = 1 << 1,
- ALLOC_OUT_TEX = 1 << 2,
GEN_MIPMAPS = 1 << 29,
DO_COMPUTE = 1 << 30,
};
@@ -181,6 +179,7 @@ typedef struct {
i32 out_data_tex_id;
i32 mip_view_tex_id;
i32 mips_level_id;
+ i32 volume_export_pass_id;
} ComputeShaderCtx;
typedef struct {
@@ -198,10 +197,12 @@ enum export_state {
};
typedef struct {
- uv4 volume_dim;
- u32 rf_data_ssbo;
- u32 volume_texture;
- u32 state;
+ Arena volume_buf;
+ uv4 volume_dim;
+ u32 volume_texture;
+ i32 volume_texture_id;
+ u32 output_ssbo;
+ u32 state;
} ExportCtx;
typedef struct {
diff --git a/main.c b/main.c
@@ -117,6 +117,11 @@ reload_shaders(BeamformerCtx *ctx, Arena a)
glDeleteShader(shader_id);
}
+ ctx->export_ctx.volume_texture_id = glGetUniformLocation(csctx->programs[CS_HERCULES],
+ "u_out_volume_tex");
+ csctx->volume_export_pass_id = glGetUniformLocation(csctx->programs[CS_HERCULES],
+ "u_volume_export_pass");
+
csctx->out_data_tex_id = glGetUniformLocation(csctx->programs[CS_UFORCES], "u_out_data_tex");
csctx->mip_view_tex_id = glGetUniformLocation(csctx->programs[CS_MIN_MAX], "u_mip_view_tex");
csctx->mips_level_id = glGetUniformLocation(csctx->programs[CS_MIN_MAX], "u_mip_map");
@@ -168,8 +173,8 @@ main(void)
Arena temp_memory = os_alloc_arena((Arena){0}, 8 * MEGABYTE);
ctx.window_size = (uv2){.w = 1280, .h = 840};
- ctx.out_data_dim = (uv4){.x = 256, .y = 1024, .z = 1};
+ ctx.out_data_dim = (uv4){.x = 1, .y = 1, .z = 1};
ctx.export_ctx.volume_dim = (uv4){.x = 1, .y = 1, .z = 1};
SetConfigFlags(FLAG_VSYNC_HINT);
@@ -232,8 +237,6 @@ main(void)
reload_shaders(&ctx, temp_memory);
ctx.flags &= ~DO_COMPUTE;
- ctx.flags |= ALLOC_SSBOS|ALLOC_OUT_TEX;
-
while(!WindowShouldClose()) {
do_debug();
if (ctx.gl_vendor_id == GL_VENDOR_NVIDIA)
diff --git a/os_unix.c b/os_unix.c
@@ -61,6 +61,17 @@ os_read_file(Arena *a, char *fname, size fsize)
return ret;
}
+static b32
+os_write_file(char *fname, s8 raw)
+{
+ i32 fd = open(fname, O_WRONLY|O_TRUNC|O_CREAT, 0600);
+ if (fd < 0)
+ return 0;
+ size wlen = write(fd, raw.data, raw.len);
+ close(fd);
+ return wlen == raw.len;
+}
+
static os_file_stats
os_get_file_stats(char *fname)
{
diff --git a/os_win32.c b/os_win32.c
@@ -71,6 +71,24 @@ os_read_file(Arena *a, char *fname, size fsize)
return ret;
}
+static b32
+os_write_file(char *fname, s8 raw)
+{
+ if (raw.size > (size)U32_MAX) {
+ fputs("os_write_file: writing files > 4GB is not yet support on win32\n", stderr);
+ return 0;
+ }
+
+ HANDLE h = CreateFileA(fname, GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, 0);
+ if (h == INVALID_HANDLE_VALUE)
+ return 0;
+
+ DWORD wlen = 0;
+ WriteFile(h, raw.data, raw.len, &wlen, 0);
+ CloseHandle(h);
+ return wlen == raw.len;
+}
+
static os_file_stats
os_get_file_stats(char *fname)
{
diff --git a/shaders/2d_hercules.glsl b/shaders/2d_hercules.glsl
@@ -28,6 +28,9 @@ layout(std140, binding = 0) uniform parameters {
};
layout(rg32f, location = 1) uniform writeonly image3D u_out_data_tex;
+layout(r32f, location = 2) uniform writeonly image3D u_out_volume_tex;
+
+layout(location = 3) uniform int u_volume_export_pass;
#define C_SPLINE 0.5
@@ -67,7 +70,9 @@ void main()
{
vec3 voxel = vec3(gl_GlobalInvocationID.xyz);
ivec3 out_coord = ivec3(gl_GlobalInvocationID.xyz);
- ivec3 out_data_dim = imageSize(u_out_data_tex);
+ ivec3 out_data_dim;
+ if (u_volume_export_pass == 0) out_data_dim = imageSize(u_out_data_tex);
+ else out_data_dim = imageSize(u_out_volume_tex);
/* NOTE: Convert pixel to physical coordinates */
vec2 xdc_size = abs(xdc_max_xy - xdc_min_xy);
@@ -101,7 +106,7 @@ void main()
vec2 sum = vec2(0);
vec3 rdist = starting_dist;
- int direction = 1;
+ int direction = 1 * (u_volume_export_pass ^ 1);
uint ridx = 0;
/* NOTE: For Each Acquistion in Raw Data */
for (uint i = 0; i < dec_data_dim.z; i++) {
@@ -129,9 +134,10 @@ void main()
ridx += dec_data_dim.x;
}
- rdist[direction] = starting_dist[direction];
- rdist[(~direction) & 1] -= delta[(~direction) & 1];
+ rdist[direction] = starting_dist[direction];
+ rdist[direction ^ 1] -= delta[direction ^ 1];
}
float val = length(sum);
- imageStore(u_out_data_tex, out_coord, vec4(val, val, 0, 0));
+ if (u_volume_export_pass == 0) imageStore(u_out_data_tex, out_coord, vec4(val));
+ else imageStore(u_out_volume_tex, out_coord, vec4(val));
}
diff --git a/ui.c b/ui.c
@@ -526,8 +526,10 @@ draw_settings_ui(BeamformerCtx *ctx, Arena arena, Rect r, v2 mouse)
btn_r.size.h = ctx->font.baseSize * 1.3;
btn_r.size.w *= 0.6;
if (do_text_button(ctx, s8("Dump Raw Volume"), btn_r, mouse, hover_t + idx++)) {
- if (!ctx->export_ctx.state)
- ctx->export_ctx.state = ES_START;
+ if (!ctx->export_ctx.state) {
+ ctx->export_ctx.state = ES_START;
+ ctx->flags |= DO_COMPUTE;
+ }
}
/* NOTE: if C compilers didn't suck this would be a static assert */