Commit: 67bd1deee3a5fa0454f0ac8d6361492c2fa9daea
Parent: 59f09bb2efea630c3fea3d4a8d246d6dcec92b72
Author: Randy Palamar
Date: Fri, 8 Nov 2024 14:58:30 -0700
restructure beamform submissions into work queue
This is moving the program towards a state where we can export
arbitrary beamformed frames.
Diffstat:
8 files changed, 486 insertions(+), 308 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -3,8 +3,6 @@
static f32 dt_for_frame;
-#include "ui.c"
-
static size
decoded_data_size(ComputeShaderCtx *cs)
{
@@ -14,63 +12,57 @@ decoded_data_size(ComputeShaderCtx *cs)
}
static void
-alloc_output_image(BeamformerCtx *ctx, Arena a)
+alloc_beamform_frame(GLParams *gp, BeamformFrame *out, uv4 out_dim, u32 frame_index, s8 name)
{
- BeamformerParameters *bp = &ctx->params->raw;
- ComputeShaderCtx *cs = &ctx->csctx;
+ glDeleteTextures(out->dim.w, out->textures);
- u32 max_3d_dim = ctx->gl.max_3d_texture_dim;
- ctx->out_data_dim.x = CLAMP(round_down_power_of_2(ORONE(bp->output_points.x)), 1, max_3d_dim);
- ctx->out_data_dim.y = CLAMP(round_down_power_of_2(ORONE(bp->output_points.y)), 1, max_3d_dim);
- ctx->out_data_dim.z = CLAMP(round_down_power_of_2(ORONE(bp->output_points.z)), 1, max_3d_dim);
- ctx->out_data_dim.w = CLAMP(bp->output_points.w, 1, ARRAY_COUNT(cs->sum_textures));
- bp->output_points = ctx->out_data_dim;
+ out->dim.x = CLAMP(round_down_power_of_2(ORONE(out_dim.x)), 1, gp->max_3d_texture_dim);
+ out->dim.y = CLAMP(round_down_power_of_2(ORONE(out_dim.y)), 1, gp->max_3d_texture_dim);
+ out->dim.z = CLAMP(round_down_power_of_2(ORONE(out_dim.z)), 1, gp->max_3d_texture_dim);
+ out->dim.w = CLAMP(out_dim.w, 0, MAX_MULTI_XDC_COUNT);
/* NOTE: allocate storage for beamformed output data;
* this is shared between compute and fragment shaders */
- uv4 odim = ctx->out_data_dim;
- u32 max_dim = MAX(odim.x, MAX(odim.y, odim.z));
- ctx->out_texture_mips = _tzcnt_u32(max_dim) + 1;
-
- glActiveTexture(GL_TEXTURE0);
- glDeleteTextures(1, &ctx->out_texture);
- glGenTextures(1, &ctx->out_texture);
- glBindTexture(GL_TEXTURE_3D, ctx->out_texture);
- glTexStorage3D(GL_TEXTURE_3D, ctx->out_texture_mips, GL_RG32F, odim.x, odim.y, odim.z);
- LABEL_GL_OBJECT(GL_TEXTURE, ctx->out_texture, s8("Beamformed_Data_Texture"));
-
- Stream label = stream_alloc(&a, 256);
- stream_append_s8(&label, s8("Sum_Texture_"));
+ u32 max_dim = MAX(out->dim.x, MAX(out->dim.y, out->dim.z));
+ out->mips = _tzcnt_u32(max_dim) + 1;
+
+ u8 buf[256];
+ Stream label = {.data = buf, .cap = ARRAY_COUNT(buf)};
+ stream_append_s8(&label, name);
+ stream_append_byte(&label, '[');
+ stream_append_u64(&label, frame_index);
+ stream_append_s8(&label, s8("]["));
u32 sidx = label.widx;
- glDeleteTextures(ARRAY_COUNT(cs->sum_textures), cs->sum_textures);
- if (odim.w > 1) {
- glGenTextures(odim.w, cs->sum_textures);
- for (u32 i = 0; i < odim.w; i++) {
- glBindTexture(GL_TEXTURE_3D, cs->sum_textures[i]);
- glTexStorage3D(GL_TEXTURE_3D, ctx->out_texture_mips, GL_RG32F, odim.x, odim.y, odim.z);
- stream_append_u64(&label, i);
- s8 slabel = stream_to_s8(&label);
- LABEL_GL_OBJECT(GL_TEXTURE, cs->sum_textures[i], slabel);
- label.widx = sidx;
- }
- }
- bp->xdc_count = CLAMP(bp->xdc_count, 1, ARRAY_COUNT(cs->array_textures));
- glDeleteTextures(ARRAY_COUNT(cs->array_textures), cs->array_textures);
- glGenTextures(bp->xdc_count, cs->array_textures);
- for (u32 i = 0; i < bp->xdc_count; i++) {
- glBindTexture(GL_TEXTURE_3D, cs->array_textures[i]);
- glTexStorage3D(GL_TEXTURE_3D, ctx->out_texture_mips, GL_RG32F, odim.x, odim.y, odim.z);
+ glCreateTextures(GL_TEXTURE_3D, out->dim.w, out->textures);
+ for (u32 i = 0; i < out->dim.w; i++) {
+ glTextureStorage3D(out->textures[i], out->mips, GL_RG32F,
+ out->dim.x, out->dim.y, out->dim.z);
+ stream_append_u64(&label, i);
+ stream_append_byte(&label, ']');
+ LABEL_GL_OBJECT(GL_TEXTURE, out->textures[i], stream_to_s8(&label));
+ label.widx = sidx;
}
+}
- UnloadRenderTexture(ctx->fsctx.output);
- /* TODO: select odim.x vs odim.y */
- ctx->fsctx.output = LoadRenderTexture(odim.x, odim.z);
- LABEL_GL_OBJECT(GL_FRAMEBUFFER, ctx->fsctx.output.id, s8("Rendered_View"));
- GenTextureMipmaps(&ctx->fsctx.output.texture);
- //SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_ANISOTROPIC_8X);
- //SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_TRILINEAR);
- SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_BILINEAR);
+static void
+alloc_output_image(BeamformerCtx *ctx, uv4 output_dim)
+{
+ uv4 try_dim = {.xyz = output_dim.xyz};
+ if (!uv4_equal(try_dim, ctx->averaged_frame.dim)) {
+ alloc_beamform_frame(&ctx->gl, &ctx->averaged_frame, try_dim, 0,
+ s8("Beamformed_Averaged_Data"));
+ uv4 odim = ctx->averaged_frame.dim;
+
+ UnloadRenderTexture(ctx->fsctx.output);
+ /* TODO: select odim.x vs odim.y */
+ ctx->fsctx.output = LoadRenderTexture(odim.x, odim.z);
+ LABEL_GL_OBJECT(GL_FRAMEBUFFER, ctx->fsctx.output.id, s8("Rendered_View"));
+ GenTextureMipmaps(&ctx->fsctx.output.texture);
+ //SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_ANISOTROPIC_8X);
+ //SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_TRILINEAR);
+ SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_BILINEAR);
+ }
}
static void
@@ -145,6 +137,95 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a)
LABEL_GL_OBJECT(GL_BUFFER, cs->hadamard_ssbo, s8("Hadamard_SSBO"));
}
+static BeamformWork *
+beamform_work_queue_pop(BeamformWorkQueue *q)
+{
+ BeamformWork *result = q->first;
+ if (result) {
+ q->first = result->next;
+ if (result == q->last) {
+ ASSERT(result->next == 0);
+ q->last = 0;
+ }
+
+ switch (result->type) {
+ case BW_FULL_COMPUTE:
+ case BW_RECOMPUTE:
+ case BW_PARTIAL_COMPUTE:
+ /* NOTE: only one compute is allowed per frame */
+ if (q->did_compute_this_frame)
+ result = 0;
+ else
+ q->did_compute_this_frame = 1;
+ break;
+ }
+ }
+ return result;
+}
+
+static BeamformWork *
+beamform_work_queue_push(BeamformerCtx *ctx, Arena *a, enum beamform_work work_type)
+{
+ BeamformWorkQueue *q = &ctx->beamform_work_queue;
+ ComputeShaderCtx *cs = &ctx->csctx;
+
+ BeamformWork *result = q->next_free;
+ if (result) q->next_free = result->next;
+ else result = alloc(a, typeof(*result), 1);
+
+ if (result) {
+ result->type = work_type;
+ result->next = 0;
+
+ switch (work_type) {
+ case BW_FULL_COMPUTE:
+ /* TODO: limiting to make sure we don't have too many of these in the queue */
+ cs->raw_data_index++;
+ if (cs->raw_data_index >= ARRAY_COUNT(cs->raw_data_fences))
+ cs->raw_data_index = 0;
+ /* FALLTHROUGH */
+ case BW_RECOMPUTE: {
+ i32 raw_index = cs->raw_data_index;
+ result->compute_ctx.raw_data_ssbo_index = raw_index;
+ /* NOTE: if this times out it means the command queue is more than 3
+ * frames behind. In that case we need to re-evaluate the buffer size */
+ if (cs->raw_data_fences[raw_index]) {
+ i32 result = glClientWaitSync(cs->raw_data_fences[raw_index], 0,
+ 10000);
+ if (result == GL_TIMEOUT_EXPIRED) {
+ //ASSERT(0);
+ }
+ glDeleteSync(cs->raw_data_fences[raw_index]);
+ cs->raw_data_fences[raw_index] = NULL;
+ }
+ ctx->displayed_frame_index++;
+ if (ctx->displayed_frame_index >= ARRAY_COUNT(ctx->beamform_frames))
+ ctx->displayed_frame_index = 0;
+ result->compute_ctx.frame = ctx->beamform_frames + ctx->displayed_frame_index;
+ result->compute_ctx.first_pass = 1;
+
+ uv4 try_dim = ctx->params->raw.output_points;
+ try_dim.w = ctx->params->raw.xdc_count;
+ if (!uv4_equal(result->compute_ctx.frame->dim, try_dim)) {
+ alloc_beamform_frame(&ctx->gl, result->compute_ctx.frame, try_dim,
+ ctx->displayed_frame_index,
+ s8("Beamformed_Data"));
+ }
+ } break;
+ case BW_PARTIAL_COMPUTE:
+ case BW_SAVE_FRAME:
+ case BW_SEND_FRAME:
+ case BW_SSBO_COPY:
+ break;
+ }
+
+ if (q->last) q->last = q->last->next = result;
+ else q->last = q->first = result;
+ }
+
+ return result;
+}
+
static m3
v3_to_xdc_space(v3 direction, v3 origin, v3 corner1, v3 corner2)
{
@@ -173,65 +254,84 @@ f32_4_to_v4(f32 *in)
return result;
}
+static void
+do_sum_shader(ComputeShaderCtx *cs, u32 *in_textures, u32 in_texture_count, f32 in_scale,
+ u32 out_texture, uv4 out_data_dim)
+{
+ /* NOTE: zero output before summing */
+ glClearTexImage(out_texture, 0, GL_RED, GL_FLOAT, 0);
+
+ glBindImageTexture(0, out_texture, 0, GL_TRUE, 0, GL_READ_WRITE, GL_RG32F);
+ glUniform1f(cs->sum_prescale_id, in_scale);
+ for (u32 i = 0; i < in_texture_count; i++) {
+ glBindImageTexture(1, in_textures[i], 0, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F);
+ glDispatchCompute(ORONE(out_data_dim.x / 32),
+ ORONE(out_data_dim.y),
+ ORONE(out_data_dim.z / 32));
+ glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
+ }
+}
+
+static void
+do_beamform_shader(ComputeShaderCtx *cs, BeamformerParameters *bp, BeamformFrame *frame,
+ u32 rf_ssbo, iv3 compute_dim_offset, i32 compute_pass)
+{
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, rf_ssbo);
+ glUniform3iv(cs->volume_export_dim_offset_id, 1, compute_dim_offset.E);
+ glUniform1i(cs->volume_export_pass_id, compute_pass);
+
+ for (u32 i = 0; i < frame->dim.w; i++) {
+ u32 texture = frame->textures[i];
+ m3 xdc_transform = v3_to_xdc_space((v3){.z = 1},
+ f32_4_to_v4(bp->xdc_origin + (4 * i)).xyz,
+ f32_4_to_v4(bp->xdc_corner1 + (4 * i)).xyz,
+ f32_4_to_v4(bp->xdc_corner2 + (4 * i)).xyz);
+ glBindImageTexture(0, texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F);
+ glUniform1i(cs->xdc_index_id, i);
+ glUniformMatrix3fv(cs->xdc_transform_id, 1, GL_FALSE, xdc_transform.E);
+ glDispatchCompute(ORONE(frame->dim.x / 32), frame->dim.y,
+ ORONE(frame->dim.z / 32));
+ }
+}
+
static b32
-do_volume_computation_step(BeamformerCtx *ctx, enum compute_shaders shader)
+do_partial_compute_step(BeamformerCtx *ctx, BeamformFrame *frame)
{
- ComputeShaderCtx *cs = &ctx->csctx;
- ExportCtx *e = &ctx->export_ctx;
+ ComputeShaderCtx *cs = &ctx->csctx;
+ PartialComputeCtx *pc = &ctx->partial_compute_ctx;
b32 done = 0;
/* NOTE: we start this elsewhere on the first dispatch so that we can include
* times such as decoding/demodulation/etc. */
- if (!(e->state & ES_TIMER_ACTIVE)) {
- glQueryCounter(e->timer_ids[0], GL_TIMESTAMP);
- e->state |= ES_TIMER_ACTIVE;
+ if (!(pc->state & PCS_TIMER_ACTIVE)) {
+ glQueryCounter(pc->timer_ids[0], GL_TIMESTAMP);
+ pc->state |= PCS_TIMER_ACTIVE;
}
- glUseProgram(cs->programs[shader]);
+ glUseProgram(cs->programs[pc->shader]);
glBindBufferBase(GL_UNIFORM_BUFFER, 0, cs->shared_ubo);
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, e->rf_data_ssbo);
-
- glBindImageTexture(0, e->volume_texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_R32F);
- glUniform1i(cs->volume_export_pass_id, 1);
/* NOTE: We must tile this otherwise GL will kill us for taking too long */
/* TODO: this could be based on multiple dimensions */
- u32 dispatch_count = e->volume_dim.z / 32;
- uv4 dim_offset = {.z = !!dispatch_count * 32 * e->dispatch_index++};
- glUniform3iv(cs->volume_export_dim_offset_id, 1, (i32 *)dim_offset.E);
- glDispatchCompute(ORONE(e->volume_dim.x / 32), e->volume_dim.y, 1);
- if (e->dispatch_index >= dispatch_count) {
- e->dispatch_index = 0;
- e->state &= ~ES_COMPUTING;
- done = 1;
+ i32 dispatch_count = frame->dim.z / 32;
+ iv3 dim_offset = {.z = !!dispatch_count * 32 * pc->dispatch_index++};
+ do_beamform_shader(cs, &ctx->params->raw, frame, pc->rf_data_ssbo, dim_offset, 1);
+
+ if (pc->dispatch_index >= dispatch_count) {
+ pc->dispatch_index = 0;
+ pc->state &= ~PCS_COMPUTING;
+ done = 1;
}
- glQueryCounter(e->timer_ids[1], GL_TIMESTAMP);
+ glQueryCounter(pc->timer_ids[1], GL_TIMESTAMP);
return done;
}
static void
-do_sum_shader(ComputeShaderCtx *cs, u32 *in_textures, u32 in_texture_count, f32 in_scale,
- u32 out_texture, uv4 out_data_dim)
-{
- /* NOTE: zero output before summing */
- glClearTexImage(out_texture, 0, GL_RED, GL_FLOAT, 0);
-
- glBindImageTexture(0, out_texture, 0, GL_TRUE, 0, GL_READ_WRITE, GL_RG32F);
- glUniform1f(cs->sum_prescale_id, in_scale);
- for (u32 i = 0; i < in_texture_count; i++) {
- glBindImageTexture(1, in_textures[i], 0, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F);
- glDispatchCompute(ORONE(out_data_dim.x / 32),
- ORONE(out_data_dim.y),
- ORONE(out_data_dim.z / 32));
- glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
- }
-}
-
-static void
-do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
+do_compute_shader(BeamformerCtx *ctx, BeamformFrame *frame, u32 raw_data_index,
+ enum compute_shaders shader)
{
ComputeShaderCtx *csctx = &ctx->csctx;
uv2 rf_raw_dim = ctx->params->raw.rf_raw_dim;
@@ -244,22 +344,23 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
u32 output_ssbo_idx = !csctx->last_output_ssbo_index;
u32 input_ssbo_idx = csctx->last_output_ssbo_index;
+
switch (shader) {
case CS_HADAMARD:
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, csctx->raw_data_ssbo,
- csctx->raw_data_index * rf_raw_size, rf_raw_size);
+ raw_data_index * rf_raw_size, rf_raw_size);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, csctx->hadamard_ssbo);
glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32),
ORONE(csctx->dec_data_dim.y / 32),
ORONE(csctx->dec_data_dim.z));
- csctx->raw_data_fences[csctx->raw_data_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+ csctx->raw_data_fences[raw_data_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
break;
case CS_CUDA_DECODE:
- ctx->cuda_lib.cuda_decode(csctx->raw_data_index * rf_raw_size, output_ssbo_idx);
- csctx->raw_data_fences[csctx->raw_data_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+ ctx->cuda_lib.cuda_decode(raw_data_index * rf_raw_size, output_ssbo_idx);
+ csctx->raw_data_fences[raw_data_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
break;
case CS_CUDA_HILBERT:
@@ -275,76 +376,42 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
break;
case CS_MIN_MAX: {
- u32 texture = ctx->out_texture;
- for (u32 i = 1; i < ctx->out_texture_mips; i++) {
+ u32 texture = frame->textures[frame->dim.w - 1];
+ for (u32 i = 1; i < frame->mips; i++) {
glBindImageTexture(0, texture, i - 1, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F);
glBindImageTexture(1, texture, i - 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F);
glUniform1i(csctx->mips_level_id, i);
- u32 width = ctx->out_data_dim.x >> i;
- u32 height = ctx->out_data_dim.y >> i;
- u32 depth = ctx->out_data_dim.z >> i;
+ u32 width = frame->dim.x >> i;
+ u32 height = frame->dim.y >> i;
+ u32 depth = frame->dim.z >> i;
glDispatchCompute(ORONE(width / 32), ORONE(height), ORONE(depth / 32));
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
} break;
case CS_HERCULES:
case CS_UFORCES: {
- if (ctx->export_ctx.state & ES_START) {
- /* NOTE: on the first frame of compute make a copy of the rf data */
- size rf_size = decoded_data_size(csctx);
- ctx->export_ctx.state &= ~ES_START;
- ctx->export_ctx.state |= ES_COMPUTING;
- glCopyNamedBufferSubData(csctx->rf_data_ssbos[input_ssbo_idx],
- ctx->export_ctx.rf_data_ssbo, 0, 0, rf_size);
- }
-
- BeamformerParameters *bp = &ctx->params->raw;
-
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]);
- glUniform3iv(csctx->volume_export_dim_offset_id, 1, (i32 []){0, 0, 0});
- glUniform1i(csctx->volume_export_pass_id, 0);
-
- for (u32 i = 0; i < bp->xdc_count; i++) {
- u32 texture;
- if (bp->xdc_count == 1) {
- if (ctx->out_data_dim.w > 1) {
- texture = csctx->sum_textures[csctx->sum_texture_index];
- } else {
- texture = ctx->out_texture;
- }
- } else {
- texture = csctx->array_textures[i];
- }
-
- m3 xdc_transform = v3_to_xdc_space((v3){.z = 1},
- f32_4_to_v4(bp->xdc_origin + (4 * i)).xyz,
- f32_4_to_v4(bp->xdc_corner1 + (4 * i)).xyz,
- f32_4_to_v4(bp->xdc_corner2 + (4 * i)).xyz);
- glBindImageTexture(0, texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F);
- glUniform1i(csctx->xdc_index_id, i);
- glUniformMatrix3fv(csctx->xdc_transform_id, 1, GL_FALSE, xdc_transform.E);
- glDispatchCompute(ORONE(ctx->out_data_dim.x / 32),
- ctx->out_data_dim.y,
- ORONE(ctx->out_data_dim.z / 32));
- }
- if (bp->xdc_count > 1) {
+ u32 rf_ssbo = csctx->rf_data_ssbos[input_ssbo_idx];
+ do_beamform_shader(csctx, &ctx->params->raw, frame, rf_ssbo, (iv3){0}, 0);
+ if (frame->dim.w > 1) {
glUseProgram(csctx->programs[CS_SUM]);
glBindBufferBase(GL_UNIFORM_BUFFER, 0, csctx->shared_ubo);
- u32 out;
- if (ctx->out_data_dim.w > 1) out = csctx->sum_textures[csctx->sum_texture_index];
- else out = ctx->out_texture;
- do_sum_shader(csctx, csctx->array_textures, bp->xdc_count,
- 1 / (f32)bp->xdc_count, out, ctx->out_data_dim);
+ u32 input_texture_count = frame->dim.w - 1;
+ do_sum_shader(csctx, frame->textures, input_texture_count,
+ 1 / (f32)input_texture_count, frame->textures[frame->dim.w - 1],
+ frame->dim);
}
} break;
case CS_SUM: {
- u32 frame_count = ctx->out_data_dim.w;
- if (frame_count > 1) {
- do_sum_shader(csctx, csctx->sum_textures, frame_count, 1 / (f32)frame_count,
- ctx->out_texture, ctx->out_data_dim);
- csctx->sum_texture_index = (csctx->sum_texture_index + 1) % frame_count;
+ u32 frame_count = ctx->params->raw.output_points.w;
+ u32 in_textures[MAX_BEAMFORMED_SAVED_FRAMES];
+ for (u32 i = 0; i < frame_count; i++) {
+ u32 idx = (ctx->displayed_frame_index - i) % ARRAY_COUNT(ctx->beamform_frames);
+ BeamformFrame *frame = ctx->beamform_frames + idx;
+ in_textures[i] = frame->textures[frame->dim.w - 1];
}
+ do_sum_shader(csctx, in_textures, frame_count, 1 / (f32)frame_count,
+ ctx->averaged_frame.textures[0], ctx->averaged_frame.dim);
} break;
default: ASSERT(0);
}
@@ -353,16 +420,101 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
}
static void
-check_compute_timers(ComputeShaderCtx *cs, ExportCtx *e, BeamformerParametersFull *bp)
+do_beamform_work(BeamformerCtx *ctx, Arena *a)
+{
+ BeamformerParameters *bp = &ctx->params->raw;
+ BeamformWorkQueue *q = &ctx->beamform_work_queue;
+ BeamformWork *work = beamform_work_queue_pop(q);
+ ComputeShaderCtx *cs = &ctx->csctx;
+
+ while (work) {
+ switch (work->type) {
+ case BW_PARTIAL_COMPUTE: {
+ BeamformFrame *frame = work->compute_ctx.frame;
+
+ if (work->compute_ctx.first_pass) {
+ if (ctx->params->upload) {
+ glNamedBufferSubData(cs->shared_ubo, 0, sizeof(*bp), bp);
+ ctx->params->upload = 0;
+ }
+
+ /* TODO: maybe we should have some concept of compute shader
+ * groups, then we could define a group that does the decoding
+ * and filtering and apply that group directly here. For now
+ * we will do this dumb thing */
+ u32 stage_count = ctx->params->compute_stages_count;
+ enum compute_shaders *stages = ctx->params->compute_stages;
+ for (u32 i = 0; i < stage_count; i++) {
+ if (stages[i] == CS_UFORCES || stages[i] == CS_HERCULES) {
+ /* TODO: this is not a proper solution if we have
+ * more beamforming shaders */
+ ctx->partial_compute_ctx.shader = stages[i];
+ break;
+ }
+ do_compute_shader(ctx, frame,
+ work->compute_ctx.raw_data_ssbo_index,
+ stages[i]);
+ }
+ u32 output_ssbo = ctx->partial_compute_ctx.rf_data_ssbo;
+ u32 input_ssbo = cs->last_output_ssbo_index;
+ size rf_size = decoded_data_size(cs);
+ glCopyNamedBufferSubData(cs->rf_data_ssbos[input_ssbo],
+ output_ssbo, 0, 0, rf_size);
+ }
+
+ b32 done = do_partial_compute_step(ctx, frame);
+ if (!done) {
+ BeamformWork *new;
+ /* NOTE: this push must not fail */
+ new = beamform_work_queue_push(ctx, a, BW_PARTIAL_COMPUTE);
+ new->compute_ctx.first_pass = 0;
+ }
+ } break;
+ case BW_FULL_COMPUTE:
+ case BW_RECOMPUTE: {
+ BeamformFrame *frame = work->compute_ctx.frame;
+
+ if (work->compute_ctx.first_pass) {
+ if (ctx->params->upload) {
+ glNamedBufferSubData(cs->shared_ubo, 0, sizeof(*bp), bp);
+ ctx->params->upload = 0;
+ }
+ }
+
+ u32 stage_count = ctx->params->compute_stages_count;
+ enum compute_shaders *stages = ctx->params->compute_stages;
+ for (u32 i = 0; i < stage_count; i++)
+ do_compute_shader(ctx, frame, work->compute_ctx.raw_data_ssbo_index,
+ stages[i]);
+ ctx->flags |= GEN_MIPMAPS;
+ } break;
+ }
+
+
+ work->next = q->next_free;
+ q->next_free = work;
+ work = beamform_work_queue_pop(q);
+ }
+
+ if (q->did_compute_this_frame) {
+ u32 tidx = ctx->csctx.timer_index;
+ glDeleteSync(ctx->csctx.timer_fences[tidx]);
+ ctx->csctx.timer_fences[tidx] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+ ctx->csctx.timer_index = (tidx + 1) % ARRAY_COUNT(ctx->csctx.timer_fences);
+ }
+}
+
+static void
+check_compute_timers(ComputeShaderCtx *cs, PartialComputeCtx *pc, BeamformerParametersFull *bp)
{
/* NOTE: volume generation running timer */
- if (e->state & ES_TIMER_ACTIVE) {
+ if (pc->state & PCS_TIMER_ACTIVE) {
u64 start_ns = 0, end_ns = 0;
- glGetQueryObjectui64v(e->timer_ids[0], GL_QUERY_RESULT, &start_ns);
- glGetQueryObjectui64v(e->timer_ids[1], GL_QUERY_RESULT, &end_ns);
+ glGetQueryObjectui64v(pc->timer_ids[0], GL_QUERY_RESULT, &start_ns);
+ glGetQueryObjectui64v(pc->timer_ids[1], GL_QUERY_RESULT, &end_ns);
u64 elapsed_ns = end_ns - start_ns;
- e->runtime += (f32)elapsed_ns * 1e-9;
- e->state &= ~ES_TIMER_ACTIVE;
+ pc->runtime += (f32)elapsed_ns * 1e-9;
+ pc->state &= ~PCS_TIMER_ACTIVE;
}
/* NOTE: main timers for display portion of the program */
@@ -384,8 +536,10 @@ check_compute_timers(ComputeShaderCtx *cs, ExportCtx *e, BeamformerParametersFul
}
}
+#include "ui.c"
+
DEBUG_EXPORT void
-do_beamformer(BeamformerCtx *ctx, Arena arena)
+do_beamformer(BeamformerCtx *ctx, Arena *arena)
{
dt_for_frame = GetFrameTime();
@@ -395,108 +549,53 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
}
/* NOTE: Store the compute time for the last frame. */
- check_compute_timers(&ctx->csctx, &ctx->export_ctx, ctx->params);
+ check_compute_timers(&ctx->csctx, &ctx->partial_compute_ctx, ctx->params);
BeamformerParameters *bp = &ctx->params->raw;
/* NOTE: Check for and Load RF Data into GPU */
if (ctx->platform.poll_pipe(ctx->data_pipe)) {
- ComputeShaderCtx *cs = &ctx->csctx;
- if (!uv4_equal(cs->dec_data_dim, bp->dec_data_dim))
- alloc_shader_storage(ctx, arena);
-
- if (!uv4_equal(ctx->out_data_dim, bp->output_points))
- alloc_output_image(ctx, arena);
-
- cs->raw_data_index = (cs->raw_data_index + 1) % ARRAY_COUNT(cs->raw_data_fences);
- i32 raw_index = ctx->csctx.raw_data_index;
- /* NOTE: if this times out it means the command queue is more than 3 frames behind.
- * In that case we need to re-evaluate the buffer size */
- if (ctx->csctx.raw_data_fences[raw_index]) {
- i32 result = glClientWaitSync(cs->raw_data_fences[raw_index], 0, 10000);
- if (result == GL_TIMEOUT_EXPIRED) {
- //ASSERT(0);
+ BeamformWork *work = beamform_work_queue_push(ctx, arena, BW_FULL_COMPUTE);
+ /* NOTE: we can only read in the new data if we get back a work item.
+ * otherwise we have too many frames in flight and should wait until the
+ * next frame to try again */
+ if (work) {
+ ComputeShaderCtx *cs = &ctx->csctx;
+ if (!uv4_equal(cs->dec_data_dim, bp->dec_data_dim)) {
+ alloc_shader_storage(ctx, *arena);
+ /* TODO: we may need to invalidate all queue items here */
}
- glDeleteSync(cs->raw_data_fences[raw_index]);
- cs->raw_data_fences[raw_index] = NULL;
- }
- uv2 rf_raw_dim = cs->rf_raw_dim;
- size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16);
-
- void *rf_data_buf = cs->raw_data_arena.beg + raw_index * rf_raw_size;
- size rlen = ctx->platform.read_pipe(ctx->data_pipe, rf_data_buf, rf_raw_size);
- if (rlen != rf_raw_size) {
- ctx->partial_transfer_count++;
- } else {
- ctx->flags |= DO_COMPUTE;
- switch (ctx->gl.vendor_id) {
- case GL_VENDOR_INTEL:
- /* TODO: intel complains about this buffer being busy even with
- * MAP_UNSYNCHRONIZED_BIT */
- case GL_VENDOR_AMD:
- break;
- case GL_VENDOR_NVIDIA:
- glNamedBufferSubData(cs->raw_data_ssbo, raw_index * rf_raw_size,
- rf_raw_size, rf_data_buf);
+ u32 raw_index = work->compute_ctx.raw_data_ssbo_index;
+ uv2 rf_raw_dim = cs->rf_raw_dim;
+ size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16);
+ void *rf_data_buf = cs->raw_data_arena.beg + raw_index * rf_raw_size;
+
+ alloc_output_image(ctx, bp->output_points);
+
+ size rlen = ctx->platform.read_pipe(ctx->data_pipe, rf_data_buf, rf_raw_size);
+ if (rlen != rf_raw_size) {
+ stream_append_s8(&ctx->error_stream, s8("Partial Read Occurred: "));
+ stream_append_i64(&ctx->error_stream, rlen);
+ stream_append_byte(&ctx->error_stream, '/');
+ stream_append_i64(&ctx->error_stream, rf_raw_size);
+ stream_append_s8(&ctx->error_stream, s8("\n\0"));
+ TraceLog(LOG_WARNING, (c8 *)stream_to_s8(&ctx->error_stream).data);
+ ctx->error_stream.widx = 0;
+ } else {
+ switch (ctx->gl.vendor_id) {
+ case GL_VENDOR_INTEL:
+ case GL_VENDOR_AMD:
+ break;
+ case GL_VENDOR_NVIDIA:
+ glNamedBufferSubData(cs->raw_data_ssbo, raw_index * rlen,
+ rlen, rf_data_buf);
+ }
}
}
}
- /* NOTE: we are starting a volume computation on this frame so make some space */
- if (ctx->export_ctx.state & ES_START) {
- ExportCtx *e = &ctx->export_ctx;
- e->runtime = 0;
- uv4 edim = e->volume_dim;
-
- /* NOTE: get a timestamp here which will include decoding/demodulating/etc. */
- glQueryCounter(e->timer_ids[0], GL_TIMESTAMP);
- e->state |= ES_TIMER_ACTIVE;
-
- glDeleteTextures(1, &e->volume_texture);
- glCreateTextures(GL_TEXTURE_3D, 1, &e->volume_texture);
- glTextureStorage3D(e->volume_texture, 1, GL_R32F, edim.x, edim.y, edim.z);
- LABEL_GL_OBJECT(GL_TEXTURE, e->volume_texture, s8("Beamformed_Volume"));
-
- glDeleteBuffers(1, &e->rf_data_ssbo);
- glCreateBuffers(1, &e->rf_data_ssbo);
- glNamedBufferStorage(e->rf_data_ssbo, decoded_data_size(&ctx->csctx), 0, 0);
- LABEL_GL_OBJECT(GL_BUFFER, e->rf_data_ssbo, s8("Volume_RF_SSBO"));
- }
-
- if (ctx->flags & DO_COMPUTE || ctx->export_ctx.state & ES_START) {
- if (ctx->params->upload && !(ctx->export_ctx.state & ES_COMPUTING)) {
- glNamedBufferSubData(ctx->csctx.shared_ubo, 0, sizeof(*bp), bp);
- ctx->params->upload = 0;
- }
-
- u32 stages = ctx->params->compute_stages_count;
- for (u32 i = 0; i < stages; i++) {
- do_compute_shader(ctx, ctx->params->compute_stages[i]);
- }
- ctx->flags &= ~DO_COMPUTE;
- ctx->flags |= GEN_MIPMAPS;
-
- u32 tidx = ctx->csctx.timer_index;
- glDeleteSync(ctx->csctx.timer_fences[tidx]);
- ctx->csctx.timer_fences[tidx] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
- ctx->csctx.timer_index = (tidx + 1) % ARRAY_COUNT(ctx->csctx.timer_fences);
- }
-
- if (ctx->export_ctx.state & ES_COMPUTING) {
- /* TODO: this could probably be adapted to do FORCES as well */
- b32 done = do_volume_computation_step(ctx, CS_HERCULES);
- if (done) {
- ExportCtx *e = &ctx->export_ctx;
- uv4 dim = e->volume_dim;
- size volume_out_size = dim.x * dim.y * dim.z * sizeof(f32);
- e->volume_buf = ctx->platform.alloc_arena(e->volume_buf, volume_out_size);
- glGetTextureImage(e->volume_texture, 0, GL_RED, GL_FLOAT, volume_out_size,
- e->volume_buf.beg);
- s8 raw = {.len = volume_out_size, .data = e->volume_buf.beg};
- if (!ctx->platform.write_new_file("raw_volume.bin", raw))
- TraceLog(LOG_WARNING, "failed to write output volume\n");
- }
- }
+ ctx->beamform_work_queue.did_compute_this_frame = 0;
+ do_beamform_work(ctx, arena);
/* NOTE: draw output image texture using render fragment shader */
BeginTextureMode(ctx->fsctx.output);
@@ -504,7 +603,15 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
BeginShaderMode(ctx->fsctx.shader);
FragmentShaderCtx *fs = &ctx->fsctx;
glUseProgram(fs->shader.id);
- glBindTextureUnit(0, ctx->out_texture);
+ u32 out_texture = 0;
+ if (bp->output_points.w > 1) {
+ out_texture = ctx->averaged_frame.textures[0];
+ } else {
+ BeamformFrame *f = ctx->beamform_frames + ctx->displayed_frame_index;
+ /* NOTE: verify we have actually beamformed something yet */
+ if (f->dim.w) out_texture = f->textures[f->dim.w - 1];
+ }
+ glBindTextureUnit(0, out_texture);
glUniform1f(fs->db_cutoff_id, fs->db);
glUniform1f(fs->threshold_id, fs->threshold);
DrawTexture(fs->output.texture, 0, 0, WHITE);
@@ -520,7 +627,7 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
ctx->flags &= ~GEN_MIPMAPS;
}
- draw_ui(ctx, arena);
+ draw_ui(ctx, *arena);
if (IsKeyPressed(KEY_R))
ctx->flags |= RELOAD_SHADERS;
diff --git a/beamformer.h b/beamformer.h
@@ -29,8 +29,7 @@
enum program_flags {
SHOULD_EXIT = 1 << 0,
RELOAD_SHADERS = 1 << 1,
- GEN_MIPMAPS = 1 << 29,
- DO_COMPUTE = 1 << 30,
+ GEN_MIPMAPS = 1 << 30,
};
enum gl_vendor_ids {
@@ -128,14 +127,6 @@ typedef struct {
GLsync timer_fences[MAX_FRAMES_IN_FLIGHT];
f32 last_frame_time[CS_LAST];
- /* NOTE: circular buffer of textures for averaging.
- * Only allocated up to configured frame average count */
- u32 sum_textures[16];
- u32 sum_texture_index;
-
- /* NOTE: array output textures. Only allocated up to configured array count */
- u32 array_textures[4];
-
/* NOTE: the raw_data_ssbo is allocated at 3x the required size to allow for tiled
* transfers when the GPU is running behind the CPU. It is not mapped on NVIDIA because
* their drivers _will_ store the buffer in the system memory. This doesn't happen
@@ -174,22 +165,32 @@ typedef struct {
f32 threshold;
} FragmentShaderCtx;
-enum export_state {
- ES_START = (1 << 0),
- ES_COMPUTING = (1 << 1),
- ES_TIMER_ACTIVE = (1 << 2),
+enum {
+ PCS_COMPUTING,
+ PCS_TIMER_ACTIVE,
};
typedef struct {
- Arena volume_buf;
+ /* NOTE: we always have one extra texture to sum into; thus the final output data
+ * is always found in textures[dim.w - 1] */
+ u32 textures[MAX_MULTI_XDC_COUNT + 1];
+ uv4 dim;
+ u32 mips;
+} BeamformFrame;
+
+typedef struct {
+ /* TODO: possibly both of these should be stored elsewhere */
+ Arena export_buf;
uv4 volume_dim;
+
+ BeamformFrame frame;
u32 timer_ids[2];
f32 runtime;
- u32 volume_texture;
u32 rf_data_ssbo;
- u32 state;
+ u32 shader;
u32 dispatch_index;
-} ExportCtx;
+ u32 state;
+} PartialComputeCtx;
typedef struct {
enum gl_vendor_ids vendor_id;
@@ -201,6 +202,49 @@ typedef struct {
i32 max_ubo_size;
} GLParams;
+enum beamform_work {
+ BW_FULL_COMPUTE,
+ BW_RECOMPUTE,
+ BW_PARTIAL_COMPUTE,
+ BW_SAVE_FRAME,
+ BW_SEND_FRAME,
+ BW_SSBO_COPY,
+};
+
+typedef struct {
+ u32 source_ssbo;
+ u32 dest_ssbo;
+} BeamformSSBOCopy;
+
+typedef struct {
+ BeamformFrame *frame;
+ u32 raw_data_ssbo_index;
+ b32 first_pass;
+} BeamformCompute;
+
+typedef struct {
+ BeamformFrame *frame;
+ iptr output_handle;
+} BeamformOutputFrame;
+
+/* NOTE: discriminated union based on type */
+typedef struct BeamformWork {
+ struct BeamformWork *next;
+ union {
+ BeamformSSBOCopy ssbo_copy_ctx;
+ BeamformCompute compute_ctx;
+ BeamformOutputFrame output_frame_ctx;
+ };
+ u32 type;
+} BeamformWork;
+
+typedef struct {
+ BeamformWork *first;
+ BeamformWork *last;
+ BeamformWork *next_free;
+ b32 did_compute_this_frame;
+} BeamformWorkQueue;
+
typedef struct BeamformerCtx {
GLParams gl;
@@ -213,13 +257,14 @@ typedef struct BeamformerCtx {
InputState is;
- uv4 out_data_dim;
- u32 out_texture;
- u32 out_texture_mips;
+ BeamformFrame beamform_frames[MAX_BEAMFORMED_SAVED_FRAMES];
+ u32 displayed_frame_index;
+ /* NOTE: this will only be used when we are averaging */
+ BeamformFrame averaged_frame;
ComputeShaderCtx csctx;
FragmentShaderCtx fsctx;
- ExportCtx export_ctx;
+ PartialComputeCtx partial_compute_ctx;
Pipe data_pipe;
u32 partial_transfer_count;
@@ -228,9 +273,11 @@ typedef struct BeamformerCtx {
Platform platform;
Stream error_stream;
+ BeamformWorkQueue beamform_work_queue;
+
BeamformerParametersFull *params;
} BeamformerCtx;
-#define LABEL_GL_OBJECT(type, id, s) glObjectLabel(type, id, (s).len, (c8 *)(s).data)
+#define LABEL_GL_OBJECT(type, id, s) {s8 _s = (s); glObjectLabel(type, id, _s.len, (c8 *)_s.data);}
#endif /*_BEAMFORMER_H_ */
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -11,14 +11,16 @@ enum compute_shaders {
CS_LAST
};
+#define MAX_BEAMFORMED_SAVED_FRAMES 16
+#define MAX_MULTI_XDC_COUNT 4
/* NOTE: This struct follows the OpenGL std140 layout. DO NOT modify unless you have
* read and understood the rules, particulary with regards to _member alignment_ */
typedef struct {
u16 channel_mapping[512]; /* Transducer Channel to Verasonics Channel */
u32 uforces_channels[128]; /* Channels used for virtual UFORCES elements */
- f32 xdc_origin[16]; /* [m] (4 v4s) Corner of transducer being treated as origin */
- f32 xdc_corner1[16]; /* [m] (4 v4s) Corner of transducer along first axis (arbitrary) */
- f32 xdc_corner2[16]; /* [m] (4 v4s) Corner of transducer along second axis (arbitrary) */
+ f32 xdc_origin[4 * MAX_MULTI_XDC_COUNT]; /* [m] Corner of transducer being treated as origin */
+ f32 xdc_corner1[4 * MAX_MULTI_XDC_COUNT]; /* [m] Corner of transducer along first axis */
+ f32 xdc_corner2[4 * MAX_MULTI_XDC_COUNT]; /* [m] Corner of transducer along second axis */
uv4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */
uv4 output_points; /* Width * Height * Depth * (Frame Average Count) */
v4 output_min_coordinate; /* [m] Back-Top-Left corner of output region (w ignored) */
@@ -36,15 +38,19 @@ typedef struct {
f32 _pad[1];
} BeamformerParameters;
+/* NOTE: garbage to get the prepocessor to properly stringize the value of a macro */
+#define str_(x) #x
+#define str(x) str_(x)
+
#define COMPUTE_SHADER_HEADER "\
#version 460 core\n\
\n\
layout(std140, binding = 0) uniform parameters {\n\
uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */\n\
uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */\n\
- vec4 xdc_origin[4]; /* [m] Corner of transducer being treated as origin */\n\
- vec4 xdc_corner1[4]; /* [m] Corner of transducer along first axis (arbitrary) */\n\
- vec4 xdc_corner2[4]; /* [m] Corner of transducer along second axis (arbitrary) */\n\
+ vec4 xdc_origin[" str(MAX_MULTI_XDC_COUNT) "]; /* [m] Corner of transducer being treated as origin */\n\
+ vec4 xdc_corner1[" str(MAX_MULTI_XDC_COUNT) "]; /* [m] Corner of transducer along first axis (arbitrary) */\n\
+ vec4 xdc_corner2[" str(MAX_MULTI_XDC_COUNT) "]; /* [m] Corner of transducer along second axis (arbitrary) */\n\
uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */\n\
uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */\n\
vec4 output_min_coord; /* [m] Top left corner of output region */\n\
diff --git a/main_generic.c b/main_generic.c
@@ -44,7 +44,7 @@ main(void)
setup_beamformer(&ctx, temp_memory);
while(!(ctx.flags & SHOULD_EXIT)) {
- do_program_step(&ctx, temp_memory);
+ do_program_step(&ctx, &temp_memory);
}
/* NOTE: make sure this will get cleaned up after external
diff --git a/static.c b/static.c
@@ -20,7 +20,8 @@ static struct {
#else
static void *debug_lib;
-typedef void do_beamformer_fn(BeamformerCtx *, Arena);
+/* TODO: move this to a header */
+typedef void do_beamformer_fn(BeamformerCtx *, Arena *);
static do_beamformer_fn *do_beamformer;
static void
@@ -173,13 +174,6 @@ compile_shader(Arena a, u32 type, s8 shader)
}
static void
-init_fragment_shader_ctx(FragmentShaderCtx *ctx, uv4 out_data_dim)
-{
- ctx->db = -50.0f;
- ctx->threshold = 40.0f;
-}
-
-static void
reload_shaders(BeamformerCtx *ctx, Arena a)
{
ComputeShaderCtx *csctx = &ctx->csctx;
@@ -211,7 +205,6 @@ reload_shaders(BeamformerCtx *ctx, Arena a)
glDeleteProgram(csctx->programs[i]);
csctx->programs[i] = rlLoadComputeShaderProgram(shader_id);
LABEL_GL_OBJECT(GL_PROGRAM, csctx->programs[i], compute_shaders[i].label);
- ctx->flags |= DO_COMPUTE;
}
glDeleteShader(shader_id);
@@ -262,8 +255,7 @@ setup_beamformer(BeamformerCtx *ctx, Arena temp_memory)
{
ctx->window_size = (uv2){.w = 1280, .h = 840};
- ctx->out_data_dim = (uv4){.x = 1, .y = 1, .z = 1};
- ctx->export_ctx.volume_dim = (uv4){.x = 1, .y = 1, .z = 1};
+ ctx->partial_compute_ctx.volume_dim = (uv4){.x = 1, .y = 1, .z = 1};
SetConfigFlags(FLAG_VSYNC_HINT);
InitWindow(ctx->window_size.w, ctx->window_size.h, "OGL Beamformer");
@@ -280,7 +272,8 @@ setup_beamformer(BeamformerCtx *ctx, Arena temp_memory)
ctx->font = LoadFontEx("assets/IBMPlexSans-Bold.ttf", 28, 0, 0);
ctx->small_font = LoadFontEx("assets/IBMPlexSans-Bold.ttf", 22, 0, 0);
- init_fragment_shader_ctx(&ctx->fsctx, ctx->out_data_dim);
+ ctx->fsctx.db = -50.0f;
+ ctx->fsctx.threshold = 40.0f;
ctx->data_pipe = os_open_named_pipe(OS_PIPE_NAME);
ctx->params = os_open_shared_memory_area(OS_SMEM_NAME, sizeof(ctx->params));
@@ -288,7 +281,6 @@ setup_beamformer(BeamformerCtx *ctx, Arena temp_memory)
ASSERT(ctx->data_pipe.file != INVALID_FILE);
ASSERT(ctx->params);
- ctx->params->raw.output_points = ctx->out_data_dim;
/* NOTE: default compute shader pipeline */
ctx->params->compute_stages[0] = CS_HADAMARD;
ctx->params->compute_stages[1] = CS_DEMOD;
@@ -311,15 +303,13 @@ setup_beamformer(BeamformerCtx *ctx, Arena temp_memory)
LABEL_GL_OBJECT(GL_BUFFER, ctx->csctx.shared_ubo, s8("Beamformer_Parameters"));
glGenQueries(ARRAY_COUNT(ctx->csctx.timer_fences) * CS_LAST, (u32 *)ctx->csctx.timer_ids);
- glGenQueries(ARRAY_COUNT(ctx->export_ctx.timer_ids), ctx->export_ctx.timer_ids);
+ glGenQueries(ARRAY_COUNT(ctx->partial_compute_ctx.timer_ids), ctx->partial_compute_ctx.timer_ids);
- /* NOTE: do not DO_COMPUTE on first frame */
reload_shaders(ctx, temp_memory);
- ctx->flags &= ~DO_COMPUTE;
}
static void
-do_program_step(BeamformerCtx *ctx, Arena temp_memory)
+do_program_step(BeamformerCtx *ctx, Arena *memory)
{
do_debug(&ctx->error_stream);
if (ctx->gl.vendor_id == GL_VENDOR_NVIDIA)
@@ -327,8 +317,8 @@ do_program_step(BeamformerCtx *ctx, Arena temp_memory)
if (ctx->flags & RELOAD_SHADERS) {
ctx->flags &= ~RELOAD_SHADERS;
- reload_shaders(ctx, temp_memory);
+ reload_shaders(ctx, *memory);
}
- do_beamformer(ctx, temp_memory);
+ do_beamformer(ctx, memory);
}
diff --git a/ui.c b/ui.c
@@ -2,10 +2,13 @@
static void
ui_start_compute(BeamformerCtx *ctx)
{
- ctx->flags |= DO_COMPUTE;
- if (ctx->params->raw.output_points.w > 1) {
- for (u32 i = 0; i < ctx->params->raw.output_points.w; i++)
- glClearTexImage(ctx->csctx.sum_textures[i], 0, GL_RED, GL_FLOAT, 0);
+ /* NOTE: we do not allow ui to start a work if no work was previously completed */
+ Arena a = {0};
+ beamform_work_queue_push(ctx, &a, BW_RECOMPUTE);
+ for (u32 i = 0; i < ARRAY_COUNT(ctx->beamform_frames); i++) {
+ BeamformFrame *frame = ctx->beamform_frames + i;
+ if (frame->dim.w && frame->textures[frame->dim.w - 1])
+ glClearTexImage(frame->textures[frame->dim.w - 1], 0, GL_RED, GL_FLOAT, 0);
}
ctx->params->upload = 1;
}
@@ -560,19 +563,21 @@ draw_settings_ui(BeamformerCtx *ctx, Arena arena, Rect r, v2 mouse)
draw_r.pos.y += 2 * LISTING_LINE_PAD;
draw_r.size.y -= 2 * LISTING_LINE_PAD;
- bmv = (BPModifiableValue){&ctx->export_ctx.volume_dim.x, bmv_store_power_of_two,
+ #if 0
+ /* TODO: work this into the work queue */
+ bmv = (BPModifiableValue){&ctx->partial_compute_ctx.volume_dim.x, bmv_store_power_of_two,
.ilimits = (iv2){.x = 1, .y = ctx->gl.max_3d_texture_dim},
MV_INT, 1, 1};
draw_r = do_text_input_listing(s8("Export Dimension X:"), s8(""), bmv, ctx, arena,
draw_r, mouse, hover_t + idx++);
- bmv = (BPModifiableValue){&ctx->export_ctx.volume_dim.y, bmv_store_power_of_two,
+ bmv = (BPModifiableValue){&ctx->partial_compute_ctx.volume_dim.y, bmv_store_power_of_two,
.ilimits = (iv2){.x = 1, .y = ctx->gl.max_3d_texture_dim},
MV_INT, 1, 1};
draw_r = do_text_input_listing(s8("Export Dimension Y:"), s8(""), bmv, ctx, arena,
draw_r, mouse, hover_t + idx++);
- bmv = (BPModifiableValue){&ctx->export_ctx.volume_dim.z, bmv_store_power_of_two,
+ bmv = (BPModifiableValue){&ctx->partial_compute_ctx.volume_dim.z, bmv_store_power_of_two,
.ilimits = (iv2){.x = 1, .y = ctx->gl.max_3d_texture_dim},
MV_INT, 1, 1};
draw_r = do_text_input_listing(s8("Export Dimension Z:"), s8(""), bmv, ctx, arena,
@@ -582,11 +587,10 @@ draw_settings_ui(BeamformerCtx *ctx, Arena arena, Rect r, v2 mouse)
btn_r.size.h = ctx->font.baseSize * 1.3;
btn_r.size.w *= 0.6;
if (do_text_button(ctx, s8("Dump Raw Volume"), btn_r, mouse, hover_t + idx++)) {
- if (!ctx->export_ctx.state) {
- ctx->export_ctx.state = ES_START;
- ctx->flags |= DO_COMPUTE;
+ if (!ctx->partial_compute_ctx.state) {
}
}
+ #endif
/* NOTE: if C compilers didn't suck this would be a static assert */
ASSERT(idx <= ARRAY_COUNT(hover_t));
@@ -631,7 +635,7 @@ draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Rect r)
}
static s8 totals[2] = {s8("Compute Total:"), s8("Volume Total:")};
- f32 times[2] = {compute_time_sum, ctx->export_ctx.runtime};
+ f32 times[2] = {compute_time_sum, ctx->partial_compute_ctx.runtime};
for (u32 i = 0; i < ARRAY_COUNT(totals); i++) {
pos.y -= measure_text(ctx->font, totals[i]).y;
draw_text(ctx->font, totals[i], pos, 0, colour_from_normalized(FG_COLOUR));
diff --git a/util.c b/util.c
@@ -47,6 +47,10 @@ mem_move(u8 *src, u8 *dest, size n)
static void *
alloc_(Arena *a, size len, size align, size count)
{
+ /* NOTE: special case 0 arena */
+ if (a->beg == 0)
+ return 0;
+
size padding = -(uintptr_t)a->beg & (align - 1);
size available = a->end - a->beg - padding;
if (available < 0 || count > available / len)
diff --git a/util.h b/util.h
@@ -11,6 +11,10 @@
#define asm __asm__
#endif
+#ifndef typeof
+#define typeof __typeof__
+#endif
+
#ifndef unreachable
#ifdef _MSC_VER
#define unreachable() __assume(0)
@@ -62,6 +66,7 @@ typedef ptrdiff_t size;
typedef ptrdiff_t iptr;
typedef struct { u8 *beg, *end; } Arena;
+typedef struct { Arena *arena; u8 *old_beg; } TempArena;
typedef struct { size len; u8 *data; } s8;
#define s8(s) (s8){.len = ARRAY_COUNT(s) - 1, .data = (u8 *)s}
@@ -79,13 +84,28 @@ typedef union {
} iv2;
typedef union {
+ struct { i32 x, y, z; };
+ struct { i32 w, h, d; };
+ iv2 xy;
+ i32 E[3];
+} iv3;
+
+typedef union {
struct { u32 x, y; };
struct { u32 w, h; };
u32 E[2];
} uv2;
typedef union {
+ struct { u32 x, y, z; };
+ struct { u32 w, h, d; };
+ uv2 xy;
+ u32 E[3];
+} uv3;
+
+typedef union {
struct { u32 x, y, z, w; };
+ uv3 xyz;
u32 E[4];
} uv4;