Commit: 9130418de20f2674c3152ae6ef4bafaba43b6a00
Parent: 358a6695b6f36e4ae9425bf8ce1dcf07e9c8df9a
Author: Randy Palamar
Date: Sun, 22 Jun 2025 12:21:30 -0600
core/lib: use shared memory for export, add compute stats export
This touches many files because deleting the export pipe means
that a large portion of other trash can be removed. It also
greatly shrinks the platform layer surface area.
Diffstat:
16 files changed, 291 insertions(+), 424 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -160,6 +160,20 @@ push_compute_timing_info(ComputeTimingTable *t, ComputeTimingInfo info)
t->buffer[index] = info;
}
+function BeamformComputeFrame *
+beamformer_get_newest_frame(BeamformerCtx *ctx, b32 average_frame)
+{
+ BeamformComputeFrame *result = 0;
+ if (average_frame) {
+ u32 a_index = !(ctx->averaged_frame_index % countof(ctx->averaged_frames));
+ result = ctx->averaged_frames + a_index;
+ } else {
+ u32 index = (ctx->next_render_frame_index - 1) % countof(ctx->beamform_frames);
+ result = ctx->beamform_frames + index;
+ }
+ return result;
+}
+
function b32
fill_frame_compute_work(BeamformerCtx *ctx, BeamformWork *work, ImagePlaneTag plane)
{
@@ -168,7 +182,7 @@ fill_frame_compute_work(BeamformerCtx *ctx, BeamformWork *work, ImagePlaneTag pl
result = 1;
u32 frame_id = atomic_add_u32(&ctx->next_render_frame_index, 1);
u32 frame_index = frame_id % countof(ctx->beamform_frames);
- work->type = BW_COMPUTE;
+ work->kind = BeamformerWorkKind_Compute;
work->lock = BeamformerSharedMemoryLockKind_DispatchCompute;
work->frame = ctx->beamform_frames + frame_index;
work->frame->ready_to_present = 0;
@@ -179,19 +193,6 @@ fill_frame_compute_work(BeamformerCtx *ctx, BeamformWork *work, ImagePlaneTag pl
}
function void
-export_frame(BeamformerCtx *ctx, iptr handle, BeamformFrame *frame)
-{
- uv3 dim = frame->dim;
- iz out_size = dim.x * dim.y * dim.z * 2 * sizeof(f32);
- ctx->export_buffer = ctx->os.alloc_arena(ctx->export_buffer, out_size);
- glGetTextureImage(frame->texture, 0, GL_RG, GL_FLOAT, out_size, ctx->export_buffer.beg);
- s8 raw = {.len = out_size, .data = ctx->export_buffer.beg};
- if (!ctx->os.write_file(handle, raw))
- ctx->os.write_file(ctx->os.error_handle, s8("failed to export frame\n"));
- ctx->os.close(handle);
-}
-
-function void
do_sum_shader(ComputeShaderCtx *cs, u32 *in_textures, u32 in_texture_count, f32 in_scale,
u32 out_texture, uv3 out_data_dim)
{
@@ -200,7 +201,7 @@ do_sum_shader(ComputeShaderCtx *cs, u32 *in_textures, u32 in_texture_count, f32
glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
glBindImageTexture(0, out_texture, 0, GL_TRUE, 0, GL_READ_WRITE, GL_RG32F);
- glProgramUniform1f(cs->programs[ShaderKind_Sum], CS_SUM_PRESCALE_UNIFORM_LOC, in_scale);
+ glProgramUniform1f(cs->programs[BeamformerShaderKind_Sum], CS_SUM_PRESCALE_UNIFORM_LOC, in_scale);
for (u32 i = 0; i < in_texture_count; i++) {
glBindImageTexture(1, in_textures[i], 0, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F);
glDispatchCompute(ORONE(out_data_dim.x / 32),
@@ -277,7 +278,7 @@ compute_cursor_finished(struct compute_cursor *cursor)
}
function void
-do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame, ShaderKind shader)
+do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame, BeamformerShaderKind shader)
{
ComputeShaderCtx *csctx = &ctx->csctx;
BeamformerSharedMemory *sm = ctx->shared_memory.region;
@@ -288,9 +289,9 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame,
u32 input_ssbo_idx = csctx->last_output_ssbo_index;
switch (shader) {
- case ShaderKind_Decode:
- case ShaderKind_DecodeFloat:
- case ShaderKind_DecodeFloatComplex:{
+ case BeamformerShaderKind_Decode:
+ case BeamformerShaderKind_DecodeFloat:
+ case BeamformerShaderKind_DecodeFloatComplex:{
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->raw_data_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]);
glBindImageTexture(0, csctx->hadamard_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8I);
@@ -300,15 +301,15 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame,
ceil_f32((f32)csctx->dec_data_dim.z / DECODE_LOCAL_SIZE_Z));
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
}break;
- case ShaderKind_CudaDecode:{
+ case BeamformerShaderKind_CudaDecode:{
ctx->cuda_lib.decode(0, output_ssbo_idx, 0);
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
}break;
- case ShaderKind_CudaHilbert:
+ case BeamformerShaderKind_CudaHilbert:
ctx->cuda_lib.hilbert(input_ssbo_idx, output_ssbo_idx);
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
break;
- case ShaderKind_Demodulate:{
+ case BeamformerShaderKind_Demodulate:{
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]);
glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32),
@@ -316,7 +317,7 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame,
ORONE(csctx->dec_data_dim.z));
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
}break;
- case ShaderKind_MinMax:{
+ case BeamformerShaderKind_MinMax:{
u32 texture = frame->frame.texture;
for (u32 i = 1; i < frame->frame.mips; i++) {
glBindImageTexture(0, texture, i - 1, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F);
@@ -330,7 +331,7 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame,
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
}break;
- case ShaderKind_DASCompute:{
+ case BeamformerShaderKind_DASCompute:{
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]);
glBindImageTexture(0, frame->frame.texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F);
glBindImageTexture(1, csctx->sparse_elements_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16I);
@@ -367,7 +368,7 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformComputeFrame *frame,
#endif
glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}break;
- case ShaderKind_Sum:{
+ case BeamformerShaderKind_Sum:{
u32 aframe_index = ctx->averaged_frame_index % ARRAY_COUNT(ctx->averaged_frames);
BeamformComputeFrame *aframe = ctx->averaged_frames + aframe_index;
aframe->ready_to_present = 0;
@@ -405,7 +406,7 @@ shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena)
stream_append_s8s(&sb, s8("#version 460 core\n\n"), ctx->header);
switch (ctx->kind) {
- case ShaderKind_DASCompute:{
+ case BeamformerShaderKind_DASCompute:{
#define X(type, id, pretty, fixed_tx) "#define DAS_ID_" #type " " #id "\n"
stream_append_s8(&sb, s8(""
"layout(local_size_x = " str(DAS_LOCAL_SIZE_X) ", "
@@ -417,14 +418,14 @@ shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena)
));
#undef X
}break;
- case ShaderKind_DecodeFloat:
- case ShaderKind_DecodeFloatComplex:{
- if (ctx->kind == ShaderKind_DecodeFloat)
+ case BeamformerShaderKind_DecodeFloat:
+ case BeamformerShaderKind_DecodeFloatComplex:{
+ if (ctx->kind == BeamformerShaderKind_DecodeFloat)
stream_append_s8(&sb, s8("#define INPUT_DATA_TYPE_FLOAT\n\n"));
else
stream_append_s8(&sb, s8("#define INPUT_DATA_TYPE_FLOAT_COMPLEX\n\n"));
} /* FALLTHROUGH */
- case ShaderKind_Decode:{
+ case BeamformerShaderKind_Decode:{
#define X(type, id, pretty) "#define DECODE_MODE_" #type " " #id "\n"
stream_append_s8(&sb, s8(""
"layout(local_size_x = " str(DECODE_LOCAL_SIZE_X) ", "
@@ -434,11 +435,11 @@ shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena)
));
#undef X
}break;
- case ShaderKind_MinMax:{
+ case BeamformerShaderKind_MinMax:{
stream_append_s8(&sb, s8("layout(location = " str(CS_MIN_MAX_MIPS_LEVEL_UNIFORM_LOC)
") uniform int u_mip_map;\n\n"));
}break;
- case ShaderKind_Sum:{
+ case BeamformerShaderKind_Sum:{
stream_append_s8(&sb, s8("layout(location = " str(CS_SUM_PRESCALE_UNIFORM_LOC)
") uniform float u_sum_prescale = 1.0;\n\n"));
}break;
@@ -477,7 +478,7 @@ DEBUG_EXPORT BEAMFORMER_RELOAD_SHADER_FN(beamformer_reload_shader)
if (new_program) {
glDeleteProgram(*src->shader);
*src->shader = new_program;
- if (src->kind == ShaderKind_Render2D) ctx->frame_view_render_context.updated = 1;
+ if (src->kind == BeamformerShaderKind_Render2D) ctx->frame_view_render_context.updated = 1;
}
return new_program != 0;
}
@@ -506,32 +507,62 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
BeamformWork *work = beamform_work_queue_pop(q);
while (work) {
b32 can_commit = 1;
- switch (work->type) {
- case BW_RELOAD_SHADER: {
+ switch (work->kind) {
+ case BeamformerWorkKind_ReloadShader:{
ShaderReloadContext *src = work->shader_reload_context;
b32 success = reload_compute_shader(ctx, src, s8(""), arena);
- if (src->kind == ShaderKind_Decode) {
+ if (src->kind == BeamformerShaderKind_Decode) {
/* TODO(rnp): think of a better way of doing this */
- src->kind = ShaderKind_DecodeFloatComplex;
- src->shader = cs->programs + ShaderKind_DecodeFloatComplex;
+ src->kind = BeamformerShaderKind_DecodeFloatComplex;
+ src->shader = cs->programs + BeamformerShaderKind_DecodeFloatComplex;
success &= reload_compute_shader(ctx, src, s8(" (F32C)"), arena);
- src->kind = ShaderKind_DecodeFloat;
- src->shader = cs->programs + ShaderKind_DecodeFloat;
+ src->kind = BeamformerShaderKind_DecodeFloat;
+ src->shader = cs->programs + BeamformerShaderKind_DecodeFloat;
success &= reload_compute_shader(ctx, src, s8(" (F32)"), arena);
- src->kind = ShaderKind_Decode;
- src->shader = cs->programs + ShaderKind_Decode;
+ src->kind = BeamformerShaderKind_Decode;
+ src->shader = cs->programs + BeamformerShaderKind_Decode;
}
- if (success) {
+ if (success && ctx->csctx.raw_data_ssbo) {
/* TODO(rnp): this check seems off */
- if (ctx->csctx.raw_data_ssbo) {
- can_commit = 0;
- ImagePlaneTag plane = ctx->beamform_frames[ctx->display_frame_index].image_plane_tag;
- fill_frame_compute_work(ctx, work, plane);
+ can_commit = 0;
+ BeamformComputeFrame *frame = beamformer_get_newest_frame(ctx, bp->output_points[3] > 1);
+ fill_frame_compute_work(ctx, work, frame->image_plane_tag);
+ }
+ }break;
+ case BeamformerWorkKind_ExportBuffer:{
+ /* TODO(rnp): better way of handling DispatchCompute barrier */
+ post_sync_barrier(&ctx->shared_memory, BeamformerSharedMemoryLockKind_DispatchCompute,
+ sm->locks, ctx->os.shared_memory_region_unlock);
+ ctx->os.shared_memory_region_lock(&ctx->shared_memory, sm->locks, (i32)work->lock, -1);
+ BeamformerExportContext *ec = &work->export_context;
+ switch (ec->kind) {
+ case BeamformerExportKind_BeamformedData:{
+ BeamformComputeFrame *frame = beamformer_get_newest_frame(ctx, bp->output_points[3] > 1);
+ assert(frame->ready_to_present);
+ u32 texture = frame->frame.texture;
+ uv3 dim = frame->frame.dim;
+ iz out_size = dim.x * dim.y * dim.z * 2 * sizeof(f32);
+ if (out_size <= ec->size) {
+ glGetTextureImage(texture, 0, GL_RG, GL_FLOAT, out_size,
+ (u8 *)sm + BEAMFORMER_SCRATCH_OFF);
}
+ }break;
+ case BeamformerExportKind_Stats:{
+ ComputeTimingTable *table = ctx->compute_timing_table;
+ /* NOTE(rnp): do a little spin to let this finish updating */
+ while (table->write_index != atomic_load_u32(&table->read_index));
+ ComputeShaderStats *stats = ctx->compute_shader_stats;
+ if (sizeof(stats->table) <= ec->size)
+ mem_copy((u8 *)sm + BEAMFORMER_SCRATCH_OFF, &stats->table, sizeof(stats->table));
+ }break;
+ InvalidDefaultCase;
}
- } break;
- case BW_UPLOAD_BUFFER: {
+ ctx->os.shared_memory_region_unlock(&ctx->shared_memory, sm->locks, (i32)work->lock);
+ post_sync_barrier(&ctx->shared_memory, BeamformerSharedMemoryLockKind_ExportSync, sm->locks,
+ ctx->os.shared_memory_region_unlock);
+ }break;
+ case BeamformerWorkKind_UploadBuffer:{
ctx->os.shared_memory_region_lock(&ctx->shared_memory, sm->locks, (i32)work->lock, -1);
BeamformerUploadContext *uc = &work->upload_context;
u32 tex_type, tex_format, tex_element_count, tex_1d = 0, buffer = 0;
@@ -589,18 +620,14 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
atomic_and_u32(&sm->dirty_regions, ~(sm->dirty_regions & 1 << (work->lock - 1)));
ctx->os.shared_memory_region_unlock(&ctx->shared_memory, sm->locks, (i32)work->lock);
- } break;
- case BW_COMPUTE_INDIRECT:{
+ }break;
+ case BeamformerWorkKind_ComputeIndirect:{
fill_frame_compute_work(ctx, work, work->compute_indirect_plane);
- DEBUG_DECL(work->type = BW_COMPUTE_INDIRECT;)
+ DEBUG_DECL(work->kind = BeamformerWorkKind_ComputeIndirect;)
} /* FALLTHROUGH */
- case BW_COMPUTE:{
- /* NOTE(rnp): debug: here it is not a bug to release the lock if it
- * isn't held but elswhere it is */
- DEBUG_DECL(if (sm->locks[work->lock])) {
- ctx->os.shared_memory_region_unlock(&ctx->shared_memory,
- sm->locks, work->lock);
- }
+ case BeamformerWorkKind_Compute:{
+ post_sync_barrier(&ctx->shared_memory, work->lock, sm->locks,
+ ctx->os.shared_memory_region_unlock);
push_compute_timing_info(ctx->compute_timing_table,
(ComputeTimingInfo){.kind = ComputeTimingInfoKind_ComputeFrameBegin});
@@ -628,7 +655,6 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
}
}
- frame->in_flight = 1;
frame->frame.min_coordinate = v4_from_f32_array(bp->output_min_coordinate);
frame->frame.max_coordinate = v4_from_f32_array(bp->output_max_coordinate);
frame->frame.das_shader_kind = bp->das_shader_id;
@@ -636,11 +662,11 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
b32 did_sum_shader = 0;
u32 stage_count = sm->compute_stages_count;
- ComputeShaderKind *stages = sm->compute_stages;
+ BeamformerShaderKind *stages = sm->compute_stages;
for (u32 i = 0; i < stage_count; i++) {
- did_sum_shader |= stages[i] == ComputeShaderKind_Sum;
+ did_sum_shader |= stages[i] == BeamformerShaderKind_Sum;
glBeginQuery(GL_TIME_ELAPSED, cs->shader_timer_ids[i]);
- do_compute_shader(ctx, arena, frame, (ShaderKind)stages[i]);
+ do_compute_shader(ctx, arena, frame, stages[i]);
glEndQuery(GL_TIME_ELAPSED);
}
/* NOTE(rnp): block until work completes so that we can record timings */
@@ -650,7 +676,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
for (u32 i = 0; i < stage_count; i++) {
ComputeTimingInfo info = {0};
info.kind = ComputeTimingInfoKind_Shader;
- info.shader = (ShaderKind)stages[i];
+ info.shader = stages[i];
glGetQueryObjectui64v(cs->shader_timer_ids[i], GL_QUERY_RESULT, &info.timer_count);
push_compute_timing_info(ctx->compute_timing_table, info);
}
@@ -668,16 +694,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
(ComputeTimingInfo){.kind = ComputeTimingInfoKind_ComputeFrameEnd});
end_renderdoc_capture(gl_context);
- } break;
- case BW_SAVE_FRAME: {
- BeamformComputeFrame *frame = work->output_frame_ctx.frame;
- if (frame->ready_to_present) {
- export_frame(ctx, work->output_frame_ctx.file_handle, &frame->frame);
- } else {
- /* TODO(rnp): should we handle this? */
- INVALID_CODE_PATH;
- }
- } break;
+ }break;
InvalidDefaultCase;
}
@@ -695,55 +712,57 @@ coalesce_timing_table(ComputeTimingTable *t, ComputeShaderStats *stats)
* info item. this could result in garbage entries but they shouldn't really matter */
u32 target = atomic_load_u32(&t->write_index);
- u32 stats_index = (stats->latest_frame_index + 1) % countof(stats->times);
+ u32 stats_index = (stats->latest_frame_index + 1) % countof(stats->table.times);
- static_assert(ShaderKind_Count + 1 <= 32, "timing coalescence bitfield test");
+ static_assert(BeamformerShaderKind_Count + 1 <= 32, "timing coalescence bitfield test");
u32 seen_info_test = 0;
while (t->read_index != target) {
- ComputeTimingInfo info = t->buffer[(t->read_index++) % countof(t->buffer)];
+ ComputeTimingInfo info = t->buffer[t->read_index % countof(t->buffer)];
switch (info.kind) {
case ComputeTimingInfoKind_ComputeFrameBegin:{
assert(t->compute_frame_active == 0);
t->compute_frame_active = 1;
/* NOTE(rnp): allow multiple instances of same shader to accumulate */
- mem_clear(stats->times[stats_index], 0, sizeof(stats->times[stats_index]));
+ mem_clear(stats->table.times[stats_index], 0, sizeof(stats->table.times[stats_index]));
}break;
case ComputeTimingInfoKind_ComputeFrameEnd:{
assert(t->compute_frame_active == 1);
t->compute_frame_active = 0;
stats->latest_frame_index = stats_index;
- stats_index = (stats_index + 1) % countof(stats->times);
+ stats_index = (stats_index + 1) % countof(stats->table.times);
}break;
case ComputeTimingInfoKind_Shader:{
- stats->times[stats_index][info.shader] += (f32)info.timer_count / 1.0e9;
+ stats->table.times[stats_index][info.shader] += (f32)info.timer_count / 1.0e9;
seen_info_test |= (1 << info.shader);
}break;
case ComputeTimingInfoKind_RF_Data:{
- stats->latest_rf_index = (stats->latest_rf_index + 1) % countof(stats->rf_time_deltas);
+ stats->latest_rf_index = (stats->latest_rf_index + 1) % countof(stats->table.rf_time_deltas);
f32 delta = (f32)(info.timer_count - stats->last_rf_timer_count) / 1.0e9;
- stats->rf_time_deltas[stats->latest_rf_index] = delta;
+ stats->table.rf_time_deltas[stats->latest_rf_index] = delta;
stats->last_rf_timer_count = info.timer_count;
- seen_info_test |= (1 << ShaderKind_Count);
+ seen_info_test |= (1 << BeamformerShaderKind_Count);
}break;
}
+ /* NOTE(rnp): do this at the end so that stats table is always in a consistent state */
+ atomic_add_u32(&t->read_index, 1);
}
if (seen_info_test) {
- for EachEnumValue(ShaderKind, shader) {
+ for EachEnumValue(BeamformerShaderKind, shader) {
if (seen_info_test & (1 << shader)) {
f32 sum = 0;
- for EachElement(stats->times, i)
- sum += stats->times[i][shader];
- stats->average_times[shader] = sum / countof(stats->times);
+ for EachElement(stats->table.times, i)
+ sum += stats->table.times[i][shader];
+ stats->average_times[shader] = sum / countof(stats->table.times);
}
}
- if (seen_info_test & (1 << ShaderKind_Count)) {
+ if (seen_info_test & (1 << BeamformerShaderKind_Count)) {
f32 sum = 0;
- for EachElement(stats->rf_time_deltas, i)
- sum += stats->rf_time_deltas[i];
- stats->rf_time_delta_average = sum / countof(stats->rf_time_deltas);
+ for EachElement(stats->table.rf_time_deltas, i)
+ sum += stats->table.rf_time_deltas[i];
+ stats->rf_time_delta_average = sum / countof(stats->table.rf_time_deltas);
}
}
}
@@ -805,58 +824,20 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step)
BeamformerSharedMemory *sm = ctx->shared_memory.region;
BeamformerParameters *bp = &sm->parameters;
+ b32 averaging = bp->output_points[3] > 1;
if (sm->locks[BeamformerSharedMemoryLockKind_DispatchCompute] && ctx->os.compute_worker.asleep) {
if (sm->start_compute_from_main) {
BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue);
- ImagePlaneTag tag = ctx->beamform_frames[ctx->display_frame_index].image_plane_tag;
- if (fill_frame_compute_work(ctx, work, tag)) {
+ ImagePlaneTag tag = beamformer_get_newest_frame(ctx, averaging)->image_plane_tag;
+ if (fill_frame_compute_work(ctx, work, tag))
beamform_work_queue_push_commit(ctx->beamform_work_queue);
- if (sm->export_next_frame) {
- BeamformWork *export = beamform_work_queue_push(ctx->beamform_work_queue);
- if (export) {
- /* TODO: we don't really want the beamformer opening/closing files */
- iptr f = ctx->os.open_for_write(ctx->os.export_pipe_name);
- export->type = BW_SAVE_FRAME;
- export->output_frame_ctx.file_handle = f;
- if (bp->output_points[3] > 1) {
- static_assert(countof(ctx->averaged_frames) == 2,
- "fix this, we assume average frame ping pong buffer");
- u32 a_index = !(ctx->averaged_frame_index %
- countof(ctx->averaged_frames));
- BeamformComputeFrame *aframe = ctx->averaged_frames + a_index;
- export->output_frame_ctx.frame = aframe;
- } else {
- export->output_frame_ctx.frame = work->frame;
- }
- beamform_work_queue_push_commit(ctx->beamform_work_queue);
- }
- sm->export_next_frame = 0;
- }
- }
atomic_store_u32(&sm->start_compute_from_main, 0);
}
ctx->os.wake_waiters(&ctx->os.compute_worker.sync_variable);
}
- ComputeFrameIterator cfi = compute_frame_iterator(ctx, ctx->display_frame_index,
- ctx->next_render_frame_index - ctx->display_frame_index);
- for (BeamformComputeFrame *frame = frame_next(&cfi); frame; frame = frame_next(&cfi)) {
- if (frame->in_flight && frame->ready_to_present) {
- frame->in_flight = 0;
- ctx->display_frame_index = frame - cfi.frames;
- }
- }
-
- BeamformComputeFrame *frame_to_draw;
- if (bp->output_points[3] > 1) {
- u32 a_index = !(ctx->averaged_frame_index % countof(ctx->averaged_frames));
- frame_to_draw = ctx->averaged_frames + a_index;
- } else {
- frame_to_draw = ctx->beamform_frames + ctx->display_frame_index;
- }
-
- draw_ui(ctx, input, frame_to_draw->ready_to_present? &frame_to_draw->frame : 0,
- frame_to_draw->image_plane_tag);
+ BeamformComputeFrame *frame = beamformer_get_newest_frame(ctx, averaging);
+ draw_ui(ctx, input, frame->ready_to_present? &frame->frame : 0, frame->image_plane_tag);
ctx->frame_view_render_context.updated = 0;
diff --git a/beamformer.h b/beamformer.h
@@ -77,7 +77,7 @@ typedef struct {
#define CS_SUM_PRESCALE_UNIFORM_LOC 1
typedef struct {
- u32 programs[ComputeShaderKind_Count];
+ u32 programs[BeamformerShaderKind_ComputeCount];
/* NOTE: Decoded data is only relevant in the context of a single frame. We use two
* buffers so that they can be swapped when chaining multiple compute stages */
@@ -110,22 +110,11 @@ typedef enum {
DASShaderKind_Count
} DASShaderKind;
-typedef enum {
- #define X(e, n, s, h, pn) ShaderKind_##e = n,
- COMPUTE_SHADERS
- #undef X
- ShaderKind_Render2D,
- ShaderKind_Count
-} ShaderKind;
-
typedef struct {
- /* NOTE(rnp): this wants to be iterated on both dimensions. it depends entirely on which
- * visualization method you want to use. the coalescing function wants both directions */
- f32 times[32][ShaderKind_Count];
- f32 average_times[ShaderKind_Count];
+ BeamformerComputeStatsTable table;
+ f32 average_times[BeamformerShaderKind_Count];
u64 last_rf_timer_count;
- f32 rf_time_deltas[32];
f32 rf_time_delta_average;
u32 latest_frame_index;
@@ -144,7 +133,7 @@ typedef struct {
u64 timer_count;
ComputeTimingInfoKind kind;
union {
- ShaderKind shader;
+ BeamformerShaderKind shader;
};
} ComputeTimingInfo;
@@ -175,7 +164,6 @@ typedef struct BeamformFrame {
struct BeamformComputeFrame {
BeamformFrame frame;
ImagePlaneTag image_plane_tag;
- b32 in_flight;
b32 ready_to_present;
};
@@ -210,7 +198,6 @@ typedef struct {
BeamformComputeFrame beamform_frames[MAX_BEAMFORMED_SAVED_FRAMES];
u32 next_render_frame_index;
- u32 display_frame_index;
/* NOTE: this will only be used when we are averaging */
u32 averaged_frame_index;
@@ -222,8 +209,6 @@ typedef struct {
* destroying itself on hot-reload */
FrameViewRenderContext frame_view_render_context;
- Arena export_buffer;
-
CudaLib cuda_lib;
OS os;
Stream error_stream;
@@ -244,7 +229,7 @@ struct ShaderReloadContext {
u32 *shader;
ShaderReloadContext *link;
GLenum gl_type;
- ShaderKind kind;
+ BeamformerShaderKind kind;
};
#define BEAMFORMER_FRAME_STEP_FN(name) void name(BeamformerCtx *ctx, Arena *arena, \
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -20,11 +20,21 @@
X(Sum, 8, "sum", 0, "Sum")
typedef enum {
- #define X(e, n, s, h, pn) ComputeShaderKind_##e = n,
+ #define X(e, n, s, h, pn) BeamformerShaderKind_##e = n,
COMPUTE_SHADERS
#undef X
- ComputeShaderKind_Count
-} ComputeShaderKind;
+ BeamformerShaderKind_Render2D,
+ BeamformerShaderKind_Count,
+
+ BeamformerShaderKind_ComputeCount = BeamformerShaderKind_Render2D,
+} BeamformerShaderKind;
+
+typedef struct {
+ /* NOTE(rnp): this wants to be iterated on both dimensions. it depends entirely on which
+ * visualization method you want to use. the coalescing function wants both directions */
+ float times[32][BeamformerShaderKind_Count];
+ float rf_time_deltas[32];
+} BeamformerComputeStatsTable;
/* X(type, id, pretty name) */
#define DECODE_TYPES \
diff --git a/beamformer_work_queue.c b/beamformer_work_queue.c
@@ -50,3 +50,14 @@ DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit)
{
atomic_add_u64(&q->queue, 1);
}
+
+function void
+post_sync_barrier(SharedMemoryRegion *sm, BeamformerSharedMemoryLockKind lock, i32 *locks,
+ os_shared_memory_region_unlock_fn *os_shared_memory_region_unlock)
+{
+ /* NOTE(rnp): debug: here it is not a bug to release the lock if it
+ * isn't held but elswhere it is */
+ DEBUG_DECL(if (locks[lock])) {
+ os_shared_memory_region_unlock(sm, locks, lock);
+ }
+}
diff --git a/beamformer_work_queue.h b/beamformer_work_queue.h
@@ -2,19 +2,19 @@
#ifndef _BEAMFORMER_WORK_QUEUE_H_
#define _BEAMFORMER_WORK_QUEUE_H_
-#define BEAMFORMER_SHARED_MEMORY_VERSION (6UL)
+#define BEAMFORMER_SHARED_MEMORY_VERSION (7UL)
typedef struct BeamformComputeFrame BeamformComputeFrame;
typedef struct ShaderReloadContext ShaderReloadContext;
typedef enum {
- BW_COMPUTE,
- BW_COMPUTE_INDIRECT,
- BW_RELOAD_SHADER,
- BW_SAVE_FRAME,
- BW_SEND_FRAME,
- BW_UPLOAD_BUFFER,
-} BeamformWorkType;
+ BeamformerWorkKind_Compute,
+ BeamformerWorkKind_ComputeIndirect,
+ BeamformerWorkKind_ReloadShader,
+ BeamformerWorkKind_SendFrame,
+ BeamformerWorkKind_ExportBuffer,
+ BeamformerWorkKind_UploadBuffer,
+} BeamformerWorkKind;
typedef enum {
BU_KIND_CHANNEL_MAPPING,
@@ -26,23 +26,29 @@ typedef enum {
} BeamformerUploadKind;
typedef struct {
+ BeamformerUploadKind kind;
i32 size;
i32 shared_memory_offset;
- BeamformerUploadKind kind;
} BeamformerUploadContext;
+typedef enum {
+ BeamformerExportKind_BeamformedData,
+ BeamformerExportKind_Stats,
+} BeamformerExportKind;
+
typedef struct {
- BeamformComputeFrame *frame;
- iptr file_handle;
-} BeamformOutputFrameContext;
+ BeamformerExportKind kind;
+ i32 size;
+} BeamformerExportContext;
#define BEAMFORMER_SHARED_MEMORY_LOCKS \
X(None) \
- X(Parameters) \
- X(FocalVectors) \
X(ChannelMapping) \
+ X(FocalVectors) \
+ X(Parameters) \
+ X(ScratchSpace) \
X(SparseElements) \
- X(RawData) \
+ X(ExportSync) \
X(DispatchCompute)
#define X(name) BeamformerSharedMemoryLockKind_##name,
@@ -52,16 +58,15 @@ typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Coun
/* NOTE: discriminated union based on type */
typedef struct {
union {
- BeamformComputeFrame *frame;
- BeamformerUploadContext upload_context;
- BeamformOutputFrameContext output_frame_ctx;
- ShaderReloadContext *shader_reload_context;
- ImagePlaneTag compute_indirect_plane;
- void *generic;
+ BeamformComputeFrame *frame;
+ BeamformerUploadContext upload_context;
+ BeamformerExportContext export_context;
+ ShaderReloadContext *shader_reload_context;
+ ImagePlaneTag compute_indirect_plane;
+ void *generic;
};
BeamformerSharedMemoryLockKind lock;
-
- BeamformWorkType type;
+ BeamformerWorkKind kind;
} BeamformWork;
typedef struct {
@@ -79,11 +84,12 @@ typedef BEAMFORM_WORK_QUEUE_PUSH_FN(beamform_work_queue_push_fn);
typedef BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit_fn);
#define BEAMFORMER_SHARED_MEMORY_SIZE (GB(2))
-#define BEAMFORMER_RF_DATA_OFF (sizeof(BeamformerSharedMemory) + 4096ULL \
+#define BEAMFORMER_SCRATCH_OFF (sizeof(BeamformerSharedMemory) + 4096ULL \
- (uintptr_t)(sizeof(BeamformerSharedMemory) & 4095ULL))
-#define BEAMFORMER_MAX_RF_DATA_SIZE (BEAMFORMER_SHARED_MEMORY_SIZE - BEAMFORMER_RF_DATA_OFF)
+#define BEAMFORMER_SCRATCH_SIZE (BEAMFORMER_SHARED_MEMORY_SIZE - BEAMFORMER_SCRATCH_OFF)
+#define BEAMFORMER_MAX_RF_DATA_SIZE (BEAMFORMER_SCRATCH_SIZE)
-typedef align_as(64) struct {
+typedef struct {
u32 version;
/* NOTE(rnp): causes future library calls to fail.
@@ -113,8 +119,8 @@ typedef align_as(64) struct {
};
};
- ComputeShaderKind compute_stages[MAX_COMPUTE_SHADER_STAGES];
- u32 compute_stages_count;
+ BeamformerShaderKind compute_stages[MAX_COMPUTE_SHADER_STAGES];
+ u32 compute_stages_count;
/* TODO(rnp): hack: we need a different way of dispatching work for export */
b32 start_compute_from_main;
diff --git a/build.c b/build.c
@@ -698,7 +698,7 @@ main(i32 argc, char *argv[])
u64 start_time = os_get_timer_counter();
b32 result = 1;
- Arena arena = os_alloc_arena((Arena){0}, MB(8));
+ Arena arena = os_alloc_arena(MB(8));
check_rebuild_self(arena, argc, argv);
Options options = parse_options(argc, argv);
diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c
@@ -6,8 +6,6 @@
#include "ogl_beamformer_lib_base.h"
#include "../beamformer_work_queue.c"
-#define PIPE_RETRY_PERIOD_MS (100ULL)
-
global SharedMemoryRegion g_shared_memory;
global BeamformerSharedMemory *g_bp;
global BeamformerLibErrorKind g_lib_last_error;
@@ -17,20 +15,7 @@ global BeamformerLibErrorKind g_lib_last_error;
#elif OS_WINDOWS
#include "../os_win32.c"
-#define PIPE_TYPE_BYTE 0x00
-#define PIPE_ACCESS_INBOUND 0x01
-
-#define PIPE_WAIT 0x00
-#define PIPE_NOWAIT 0x01
-
-#define ERROR_NO_DATA 232L
-#define ERROR_PIPE_NOT_CONNECTED 233L
-#define ERROR_PIPE_LISTENING 536L
-
-W32(iptr) CreateNamedPipeA(c8 *, u32, u32, u32, u32, u32, u32, void *);
-W32(b32) DisconnectNamedPipe(iptr);
W32(iptr) OpenFileMappingA(u32, b32, c8 *);
-W32(void) Sleep(u32);
#else
#error Unsupported Platform
@@ -38,41 +23,6 @@ W32(void) Sleep(u32);
#if OS_LINUX
-function Pipe
-os_open_read_pipe(char *name)
-{
- mkfifo(name, 0660);
- return (Pipe){.file = open(name, O_RDONLY|O_NONBLOCK), .name = name};
-}
-
-static void
-os_disconnect_pipe(Pipe p)
-{
-}
-
-static void
-os_close_pipe(iptr *file, char *name)
-{
- if (file) close(*file);
- if (name) unlink(name);
- *file = INVALID_FILE;
-}
-
-static b32
-os_wait_read_pipe(Pipe p, void *buf, iz read_size, u32 timeout_ms)
-{
- struct pollfd pfd = {.fd = p.file, .events = POLLIN};
- iz total_read = 0;
- if (poll(&pfd, 1, timeout_ms) > 0) {
- iz r;
- do {
- r = read(p.file, (u8 *)buf + total_read, read_size - total_read);
- if (r > 0) total_read += r;
- } while (r != 0);
- }
- return total_read == read_size;
-}
-
function SharedMemoryRegion
os_open_shared_memory_area(char *name)
{
@@ -88,54 +38,6 @@ os_open_shared_memory_area(char *name)
#elif OS_WINDOWS
-static Pipe
-os_open_read_pipe(char *name)
-{
- iptr file = CreateNamedPipeA(name, PIPE_ACCESS_INBOUND, PIPE_TYPE_BYTE|PIPE_NOWAIT, 1,
- 0, 1024UL * 1024UL, 0, 0);
- return (Pipe){.file = file, .name = name};
-}
-
-static void
-os_disconnect_pipe(Pipe p)
-{
- DisconnectNamedPipe(p.file);
-}
-
-static void
-os_close_pipe(iptr *file, char *name)
-{
- if (file) CloseHandle(*file);
- *file = INVALID_FILE;
-}
-
-static b32
-os_wait_read_pipe(Pipe p, void *buf, iz read_size, u32 timeout_ms)
-{
- iz elapsed_ms = 0, total_read = 0;
- while (elapsed_ms <= timeout_ms && read_size != total_read) {
- u8 data;
- i32 read;
- b32 result = ReadFile(p.file, &data, 0, &read, 0);
- if (!result) {
- i32 error = GetLastError();
- if (error != ERROR_NO_DATA &&
- error != ERROR_PIPE_LISTENING &&
- error != ERROR_PIPE_NOT_CONNECTED)
- {
- /* NOTE: pipe is in a bad state; we will never read anything */
- break;
- }
- Sleep(PIPE_RETRY_PERIOD_MS);
- elapsed_ms += PIPE_RETRY_PERIOD_MS;
- } else {
- ReadFile(p.file, (u8 *)buf + total_read, read_size - total_read, &read, 0);
- total_read += read;
- }
- }
- return total_read == read_size;
-}
-
function SharedMemoryRegion
os_open_shared_memory_area(char *name)
{
@@ -216,6 +118,18 @@ lib_release_lock(BeamformerSharedMemoryLockKind lock)
os_shared_memory_region_unlock(&g_shared_memory, g_bp->locks, (i32)lock);
}
+function b32
+try_wait_sync(BeamformerSharedMemoryLockKind lock, i32 timeout_ms)
+{
+ b32 result = 0;
+ if (lib_try_lock(lock, 0) && lib_try_lock(lock, timeout_ms)) {
+ /* TODO(rnp): non-critical race condition */
+ lib_release_lock(lock);
+ result = 1;
+ }
+ return result;
+}
+
u32
beamformer_get_api_version(void)
{
@@ -251,7 +165,7 @@ set_beamformer_pipeline(i32 *stages, i32 stages_count)
if (check_shared_memory()) {
g_bp->compute_stages_count = 0;
for (i32 i = 0; i < stages_count; i++) {
- if (BETWEEN(stages[i], 0, ComputeShaderKind_Count)) {
+ if (BETWEEN(stages[i], 0, BeamformerShaderKind_ComputeCount)) {
g_bp->compute_stages[g_bp->compute_stages_count++] = stages[i];
}
}
@@ -270,16 +184,8 @@ set_beamformer_pipeline(i32 *stages, i32 stages_count)
b32
beamformer_start_compute(i32 timeout_ms)
{
- b32 result = 0;
- if (check_shared_memory()) {
- if (lib_try_lock(BeamformerSharedMemoryLockKind_DispatchCompute, 0)) {
- if (lib_try_lock(BeamformerSharedMemoryLockKind_DispatchCompute, timeout_ms)) {
- /* TODO(rnp): non-critical race condition */
- lib_release_lock(BeamformerSharedMemoryLockKind_DispatchCompute);
- result = 1;
- }
- }
- }
+ b32 result = check_shared_memory() &&
+ try_wait_sync(BeamformerSharedMemoryLockKind_DispatchCompute, timeout_ms);
return result;
}
@@ -293,7 +199,7 @@ beamformer_upload_buffer(void *data, u32 size, i32 store_offset, BeamformerUploa
result = work && lib_try_lock(lock, timeout_ms);
if (result) {
work->upload_context = upload_context;
- work->type = BW_UPLOAD_BUFFER;
+ work->kind = BeamformerWorkKind_UploadBuffer;
work->lock = lock;
mem_copy((u8 *)g_bp + store_offset, data, size);
if ((atomic_load_u32(&g_bp->dirty_regions) & (1 << (lock - 1))) == 0) {
@@ -335,11 +241,11 @@ beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms, b32 start_f
b32 result = 0;
if (data_size <= BEAMFORMER_MAX_RF_DATA_SIZE) {
BeamformerUploadContext uc = {0};
- uc.shared_memory_offset = BEAMFORMER_RF_DATA_OFF;
+ uc.shared_memory_offset = BEAMFORMER_SCRATCH_OFF;
uc.size = data_size;
uc.kind = BU_KIND_RF_DATA;
result = beamformer_upload_buffer(data, data_size, uc.shared_memory_offset, uc,
- BeamformerSharedMemoryLockKind_RawData, timeout_ms);
+ BeamformerSharedMemoryLockKind_ScratchSpace, timeout_ms);
if (result && start_from_main) atomic_store_u32(&g_bp->start_compute_from_main, 1);
} else {
g_lib_last_error = BF_LIB_ERR_KIND_BUFFER_OVERFLOW;
@@ -363,7 +269,7 @@ beamformer_push_data_with_compute(void *data, u32 data_size, u32 image_plane_tag
BeamformWork *work = try_push_work_queue();
result = work != 0;
if (result) {
- work->type = BW_COMPUTE_INDIRECT;
+ work->kind = BeamformerWorkKind_ComputeIndirect;
work->compute_indirect_plane = image_plane_tag;
beamform_work_queue_push_commit(&g_bp->external_work_queue);
}
@@ -438,6 +344,20 @@ send_data(void *data, u32 data_size)
return result;
}
+function b32
+beamformer_export_buffer(BeamformerExportContext export_context)
+{
+ BeamformWork *work = try_push_work_queue();
+ b32 result = work != 0;
+ if (result) {
+ work->export_context = export_context;
+ work->kind = BeamformerWorkKind_ExportBuffer;
+ work->lock = BeamformerSharedMemoryLockKind_ScratchSpace;
+ beamform_work_queue_push_commit(&g_bp->external_work_queue);
+ }
+ return result;
+}
+
b32
beamform_data_synchronized(void *data, u32 data_size, u32 output_points[3], f32 *out_data, i32 timeout_ms)
{
@@ -450,21 +370,46 @@ beamform_data_synchronized(void *data, u32 data_size, u32 output_points[3], f32
g_bp->parameters.output_points[0] = output_points[0];
g_bp->parameters.output_points[1] = output_points[1];
g_bp->parameters.output_points[2] = output_points[2];
- g_bp->export_next_frame = 1;
-
- Pipe export_pipe = os_open_read_pipe(OS_EXPORT_PIPE_NAME);
- if (export_pipe.file != INVALID_FILE) {
- if (send_data(data, data_size)) {
- iz output_size = output_points[0] * output_points[1] *
- output_points[2] * sizeof(f32) * 2;
- result = os_wait_read_pipe(export_pipe, out_data, output_size, timeout_ms);
- if (!result) g_lib_last_error = BF_LIB_ERR_KIND_READ_EXPORT_PIPE;
- }
- os_disconnect_pipe(export_pipe);
- os_close_pipe(&export_pipe.file, export_pipe.name);
+ iz output_size = output_points[0] * output_points[1] * output_points[2] * sizeof(f32) * 2;
+ if (output_size <= BEAMFORMER_SCRATCH_SIZE &&
+ beamformer_push_data_with_compute(data, data_size, 0, 0))
+ {
+ BeamformerExportContext export;
+ export.kind = BeamformerExportKind_BeamformedData;
+ export.size = output_size;
+ if (beamformer_export_buffer(export) &&
+ lib_try_lock(BeamformerSharedMemoryLockKind_DispatchCompute, 0))
+ {
+ if (try_wait_sync(BeamformerSharedMemoryLockKind_ExportSync, timeout_ms)) {
+ mem_copy(out_data, (u8 *)g_bp + BEAMFORMER_SCRATCH_OFF, output_size);
+ result = 1;
+ }
+ }
} else {
- g_lib_last_error = BF_LIB_ERR_KIND_OPEN_EXPORT_PIPE;
+ g_lib_last_error = BF_LIB_ERR_KIND_EXPORT_SPACE_OVERFLOW;
+ }
+ }
+ return result;
+}
+
+b32
+beamformer_compute_timings(BeamformerComputeStatsTable *output, i32 timeout_ms)
+{
+ b32 result = 0;
+ if (check_shared_memory()) {
+ static_assert(sizeof(*output) <= BEAMFORMER_SCRATCH_SIZE, "timing table size exceeds scratch space");
+ BeamformerExportContext export;
+ export.kind = BeamformerExportKind_Stats;
+ export.size = sizeof(*output);
+
+ if (beamformer_export_buffer(export) &&
+ lib_try_lock(BeamformerSharedMemoryLockKind_DispatchCompute, 0))
+ {
+ if (try_wait_sync(BeamformerSharedMemoryLockKind_ExportSync, timeout_ms)) {
+ mem_copy(output, (u8 *)g_bp + BEAMFORMER_SCRATCH_OFF, sizeof(*output));
+ result = 1;
+ }
}
}
return result;
diff --git a/helpers/ogl_beamformer_lib_base.h b/helpers/ogl_beamformer_lib_base.h
@@ -16,10 +16,9 @@
X(INVALID_IMAGE_PLANE, 5, "invalid image plane") \
X(BUFFER_OVERFLOW, 6, "passed buffer size exceeds available space") \
X(WORK_QUEUE_FULL, 7, "work queue full") \
- X(OPEN_EXPORT_PIPE, 8, "failed to open export pipe") \
- X(READ_EXPORT_PIPE, 9, "failed to read full export data from pipe") \
- X(SHARED_MEMORY, 10, "failed to open shared memory region") \
- X(SYNC_VARIABLE, 11, "failed to acquire lock within timeout period")
+ X(EXPORT_SPACE_OVERFLOW, 8, "not enough space for data export") \
+ X(SHARED_MEMORY, 9, "failed to open shared memory region") \
+ X(SYNC_VARIABLE, 10, "failed to acquire lock within timeout period")
#define X(type, num, string) BF_LIB_ERR_KIND_ ##type = num,
typedef enum {BEAMFORMER_LIB_ERRORS} BeamformerLibErrorKind;
@@ -41,6 +40,9 @@ LIB_FN uint32_t send_data(void *data, uint32_t data_size);
LIB_FN uint32_t beamform_data_synchronized(void *data, uint32_t data_size, uint32_t output_points[3],
float *out_data, int32_t timeout_ms);
+/* NOTE: downloads the last 32 frames worth of compute timings into output */
+LIB_FN uint32_t beamformer_compute_timings(BeamformerComputeStatsTable *output, int32_t timeout_ms);
+
/* NOTE: tells the beamformer to start beamforming and waits until it starts or for timeout_ms */
LIB_FN uint32_t beamformer_start_compute(int32_t timeout_ms);
diff --git a/main_linux.c b/main_linux.c
@@ -75,7 +75,7 @@ main(void)
{
BeamformerCtx ctx = {0};
BeamformerInput input = {.executable_reloaded = 1};
- Arena temp_memory = os_alloc_arena((Arena){0}, MB(16));
+ Arena temp_memory = os_alloc_arena(MB(16));
ctx.error_stream = stream_alloc(&temp_memory, MB(1));
ctx.ui_backing_store = sub_arena(&temp_memory, MB(2), KB(4));
@@ -88,7 +88,6 @@ main(void)
ctx.os.file_watch_context.handle = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
ctx.os.compute_worker.asleep = 1;
ctx.os.error_handle = STDERR_FILENO;
- ctx.os.export_pipe_name = OS_EXPORT_PIPE_NAME;
setup_beamformer(&ctx, &input, &temp_memory);
os_wake_waiters(&ctx.os.compute_worker.sync_variable);
diff --git a/main_w32.c b/main_w32.c
@@ -104,7 +104,7 @@ main(void)
{
BeamformerCtx ctx = {0};
BeamformerInput input = {.executable_reloaded = 1};
- Arena temp_memory = os_alloc_arena((Arena){0}, MB(16));
+ Arena temp_memory = os_alloc_arena(MB(16));
ctx.error_stream = stream_alloc(&temp_memory, MB(1));
ctx.ui_backing_store = sub_arena(&temp_memory, MB(2), KB(4));
@@ -121,7 +121,6 @@ main(void)
ctx.os.context = (iptr)&w32_ctx;
ctx.os.compute_worker.asleep = 1;
ctx.os.error_handle = GetStdHandle(STD_ERROR_HANDLE);
- ctx.os.export_pipe_name = OS_EXPORT_PIPE_NAME;
setup_beamformer(&ctx, &input, &temp_memory);
os_wake_waiters(&ctx.os.compute_worker.sync_variable);
diff --git a/os_linux.c b/os_linux.c
@@ -4,7 +4,6 @@
* be provided by any platform the beamformer is ported to. */
#define OS_SHARED_MEMORY_NAME "/ogl_beamformer_shared_memory"
-#define OS_EXPORT_PIPE_NAME "/tmp/beamformer_output_pipe"
#define OS_PATH_SEPARATOR_CHAR '/'
#define OS_PATH_SEPARATOR "/"
@@ -92,29 +91,12 @@ os_round_up_to_page_size(iz value)
function OS_ALLOC_ARENA_FN(os_alloc_arena)
{
- Arena result = old;
- capacity = os_round_up_to_page_size(capacity);
- iz old_size = old.end - old.beg;
- if (old_size < capacity) {
- if (old.beg) munmap(old.beg, old_size);
- result.beg = mmap(0, capacity, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- if (result.beg == MAP_FAILED)
- os_fatal(s8("os_alloc_arena: couldn't allocate memory\n"));
- result.end = result.beg + capacity;
- }
- return result;
-}
-
-function OS_CLOSE_FN(os_close)
-{
- close(file);
-}
-
-function OS_OPEN_FOR_WRITE_FN(os_open_for_write)
-{
- iptr result = open(fname, O_WRONLY|O_TRUNC);
- if (result == -1)
- result = INVALID_FILE;
+ Arena result = {0};
+ capacity = os_round_up_to_page_size(capacity);
+ result.beg = mmap(0, capacity, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (result.beg == MAP_FAILED)
+ os_fatal(s8("os_alloc_arena: couldn't allocate memory\n"));
+ result.end = result.beg + capacity;
return result;
}
@@ -153,17 +135,6 @@ os_file_exists(char *path)
return result;
}
-function OS_READ_FILE_FN(os_read_file)
-{
- iz r = 0, total_read = 0;
- do {
- if (r != -1)
- total_read += r;
- r = read(file, buf + total_read, size - total_read);
- } while (r);
- return total_read;
-}
-
function SharedMemoryRegion
os_create_shared_memory_area(Arena *arena, char *name, i32 lock_count, iz requested_capacity)
{
diff --git a/os_win32.c b/os_win32.c
@@ -1,7 +1,6 @@
/* See LICENSE for license details. */
#define OS_SHARED_MEMORY_NAME "Local\\ogl_beamformer_parameters"
-#define OS_EXPORT_PIPE_NAME "\\\\.\\pipe\\beamformer_output_pipe"
#define OS_PATH_SEPARATOR_CHAR '\\'
#define OS_PATH_SEPARATOR "\\"
@@ -15,7 +14,6 @@
#define PAGE_READWRITE 0x04
#define MEM_COMMIT 0x1000
#define MEM_RESERVE 0x2000
-#define MEM_RELEASE 0x8000
#define GENERIC_WRITE 0x40000000
#define GENERIC_READ 0x80000000
@@ -120,7 +118,6 @@ W32(i32) WakeByAddressAll(void *);
W32(iptr) wglGetProcAddress(c8 *);
W32(b32) WriteFile(iptr, u8 *, i32, i32 *, void *);
W32(void *) VirtualAlloc(u8 *, iz, u32, u32);
-W32(b32) VirtualFree(u8 *, iz, u32);
#ifdef _DEBUG
function void *
@@ -197,27 +194,12 @@ os_round_up_to_page_size(iz value)
function OS_ALLOC_ARENA_FN(os_alloc_arena)
{
- Arena result = old;
- capacity = os_round_up_to_page_size(capacity);
- iz old_size = old.end - old.beg;
- if (old_size < capacity) {
- if (old.beg) VirtualFree(old.beg, old_size, MEM_RELEASE);
- result.beg = VirtualAlloc(0, capacity, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
- if (!result.beg)
- os_fatal(s8("os_alloc_arena: couldn't allocate memory\n"));
- result.end = result.beg + capacity;
- }
- return result;
-}
-
-function OS_CLOSE_FN(os_close)
-{
- CloseHandle(file);
-}
-
-function OS_OPEN_FOR_WRITE_FN(os_open_for_write)
-{
- iptr result = CreateFileA(fname, GENERIC_WRITE, 0, 0, OPEN_EXISTING, 0, 0);
+ Arena result = {0};
+ capacity = os_round_up_to_page_size(capacity);
+ result.beg = VirtualAlloc(0, capacity, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+ if (!result.beg)
+ os_fatal(s8("os_alloc_arena: couldn't allocate memory\n"));
+ result.end = result.beg + capacity;
return result;
}
@@ -243,13 +225,6 @@ function OS_READ_WHOLE_FILE_FN(os_read_whole_file)
return result;
}
-function OS_READ_FILE_FN(os_read_file)
-{
- i32 total_read = 0;
- ReadFile(file, buf, size, &total_read, 0);
- return total_read;
-}
-
function OS_WRITE_NEW_FILE_FN(os_write_new_file)
{
b32 result = 0;
diff --git a/static.c b/static.c
@@ -180,7 +180,7 @@ function FILE_WATCH_CALLBACK_FN(reload_shader_indirect)
BeamformerCtx *ctx = src->beamformer_context;
BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue);
if (work) {
- work->type = BW_RELOAD_SHADER;
+ work->kind = BeamformerWorkKind_ReloadShader,
work->shader_reload_context = src;
beamform_work_queue_push_commit(ctx->beamform_work_queue);
os_wake_waiters(&os->compute_worker.sync_variable);
@@ -291,8 +291,8 @@ setup_beamformer(BeamformerCtx *ctx, BeamformerInput *input, Arena *memory)
sm->version = BEAMFORMER_SHARED_MEMORY_VERSION;
/* NOTE: default compute shader pipeline */
- sm->compute_stages[0] = ComputeShaderKind_Decode;
- sm->compute_stages[1] = ComputeShaderKind_DASCompute;
+ sm->compute_stages[0] = BeamformerShaderKind_Decode;
+ sm->compute_stages[1] = BeamformerShaderKind_DASCompute;
sm->compute_stages_count = 2;
if (ctx->gl.vendor_id == GL_VENDOR_NVIDIA
@@ -325,15 +325,16 @@ setup_beamformer(BeamformerCtx *ctx, BeamformerInput *input, Arena *memory)
);
#undef X
+ ComputeShaderCtx *cs = &ctx->csctx;
#define X(e, sn, f, nh, pretty_name) do if (s8(f).len > 0) { \
ShaderReloadContext *src = push_struct(memory, typeof(*src)); \
src->beamformer_context = ctx; \
if (nh) src->header = compute_parameters_header; \
src->path = s8(static_path_join("shaders", f ".glsl")); \
src->name = src->path; \
- src->shader = ctx->csctx.programs + ShaderKind_##e; \
+ src->shader = cs->programs + BeamformerShaderKind_##e; \
src->gl_type = GL_COMPUTE_SHADER; \
- src->kind = ShaderKind_##e; \
+ src->kind = BeamformerShaderKind_##e; \
src->link = src; \
os_add_file_watch(&ctx->os, memory, src->path, reload_shader_indirect, (iptr)src); \
reload_shader_indirect(&ctx->os, src->path, (iptr)src, *memory); \
@@ -372,7 +373,7 @@ setup_beamformer(BeamformerCtx *ctx, BeamformerInput *input, Arena *memory)
render_2d->path = s8(static_path_join("shaders", "render_2d.frag.glsl"));
render_2d->name = s8("shaders/render_2d.glsl");
render_2d->gl_type = GL_FRAGMENT_SHADER;
- render_2d->kind = ShaderKind_Render2D;
+ render_2d->kind = BeamformerShaderKind_Render2D;
render_2d->shader = &fvr->shader;
render_2d->header = s8(""
"layout(location = 0) in vec2 texture_coordinate;\n"
diff --git a/tests/throughput.c b/tests/throughput.c
@@ -2,6 +2,7 @@
/* TODO(rnp):
* [ ]: for finer grained evaluation of throughput latency just queue a data upload
* without replacing the data.
+ * [ ]: bug: we aren't inserting rf data between each frame
*/
#define LIB_FN function
@@ -371,9 +372,9 @@ execute_study(s8 study, Arena arena, Stream path, Options *options)
i32 shader_stages[16];
i32 shader_stage_count = 0;
- if (options->cuda) shader_stages[shader_stage_count++] = ComputeShaderKind_CudaDecode;
- else shader_stages[shader_stage_count++] = ComputeShaderKind_Decode;
- shader_stages[shader_stage_count++] = ComputeShaderKind_DASCompute;
+ if (options->cuda) shader_stages[shader_stage_count++] = BeamformerShaderKind_CudaDecode;
+ else shader_stages[shader_stage_count++] = BeamformerShaderKind_Decode;
+ shader_stages[shader_stage_count++] = BeamformerShaderKind_DASCompute;
set_beamformer_pipeline(shader_stages, shader_stage_count);
@@ -428,7 +429,7 @@ main(i32 argc, char *argv[])
signal(SIGINT, sigint);
- Arena arena = os_alloc_arena((Arena){0}, KB(8));
+ Arena arena = os_alloc_arena(KB(8));
Stream path = stream_alloc(&arena, KB(4));
stream_append_s8(&path, c_str_to_s8(options.remaining[0]));
stream_ensure_termination(&path, OS_PATH_SEPARATOR_CHAR);
diff --git a/ui.c b/ui.c
@@ -2032,8 +2032,8 @@ draw_compute_progress_bar(BeamformerUI *ui, Arena arena, ComputeProgressBar *sta
function v2
draw_compute_stats_view(BeamformerCtx *ctx, Arena arena, Rect r)
{
- #define X(e, n, s, h, pn) [ComputeShaderKind_##e] = s8_comp(pn ":"),
- read_only local_persist s8 labels[ComputeShaderKind_Count] = {COMPUTE_SHADERS};
+ #define X(e, n, s, h, pn) [BeamformerShaderKind_##e] = s8_comp(pn ":"),
+ read_only local_persist s8 labels[BeamformerShaderKind_ComputeCount] = {COMPUTE_SHADERS};
#undef X
BeamformerSharedMemory *sm = ctx->shared_memory.region;
@@ -2043,14 +2043,14 @@ draw_compute_stats_view(BeamformerCtx *ctx, Arena arena, Rect r)
u32 stages = sm->compute_stages_count;
TextSpec text_spec = {.font = &ui->font, .colour = FG_COLOUR, .flags = TF_LIMITED};
- static_assert(ShaderKind_Count <= 32, "shader kind bitfield test");
+ static_assert(countof(labels) <= 32, "shader kind bitfield test");
u32 seen_shaders = 0;
Table *table = table_new(&arena, stages + 2, TextAlignment_Left, TextAlignment_Left, TextAlignment_Left);
for (u32 i = 0; i < stages; i++) {
TableCell *cells = table_push_row(table, &arena, TRK_CELLS)->data;
Stream sb = arena_stream(arena);
- ShaderKind index = (ShaderKind)sm->compute_stages[i];
+ BeamformerShaderKind index = sm->compute_stages[i];
if ((seen_shaders & (1 << index)) == 0) {
compute_time_sum += stats->average_times[index];
stream_append_f64_e(&sb, stats->average_times[index]);
@@ -3019,8 +3019,7 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformFrame *frame_to_draw
b32 dispatch = ctx->os.shared_memory_region_lock(&ctx->shared_memory, sm->locks,
BeamformerSharedMemoryLockKind_DispatchCompute,
0);
- sm->start_compute_from_main |= dispatch &
- ctx->beamform_frames[ctx->display_frame_index].ready_to_present;
+ sm->start_compute_from_main |= dispatch & beamformer_get_newest_frame(ctx, 0)->ready_to_present;
ctx->os.shared_memory_region_unlock(&ctx->shared_memory, sm->locks, lock);
}
}
diff --git a/util.h b/util.h
@@ -43,6 +43,7 @@
#define INVALID_CODE_PATH ASSERT(0)
#define INVALID_DEFAULT_CASE default: ASSERT(0); break
+#define InvalidCodePath assert(0)
#define InvalidDefaultCase default: assert(0); break
#define arg_list(type, ...) (type []){__VA_ARGS__}, sizeof((type []){__VA_ARGS__}) / sizeof(type)
@@ -207,18 +208,14 @@ typedef union {
.size = {.x = -F32_INFINITY, .y = -F32_INFINITY}}
typedef struct {
- iptr file;
- char *name;
-} Pipe;
-#define INVALID_FILE (-1)
-
-typedef struct {
u8 *data;
u32 widx;
u32 cap;
b32 errors;
} Stream;
+#define INVALID_FILE (-1)
+
typedef struct OS OS;
typedef struct {
@@ -263,7 +260,7 @@ typedef struct {
iptr os_context;
} SharedMemoryRegion;
-#define OS_ALLOC_ARENA_FN(name) Arena name(Arena old, iz capacity)
+#define OS_ALLOC_ARENA_FN(name) Arena name(iz capacity)
typedef OS_ALLOC_ARENA_FN(os_alloc_arena_fn);
#define OS_ADD_FILE_WATCH_FN(name) void name(OS *os, Arena *a, s8 path, \
@@ -273,18 +270,9 @@ typedef OS_ADD_FILE_WATCH_FN(os_add_file_watch_fn);
#define OS_WAKE_WORKER_FN(name) void name(GLWorkerThreadContext *ctx)
typedef OS_WAKE_WORKER_FN(os_wake_worker_fn);
-#define OS_CLOSE_FN(name) void name(iptr file)
-typedef OS_CLOSE_FN(os_close_fn);
-
-#define OS_OPEN_FOR_WRITE_FN(name) iptr name(c8 *fname)
-typedef OS_OPEN_FOR_WRITE_FN(os_open_for_write_fn);
-
#define OS_READ_WHOLE_FILE_FN(name) s8 name(Arena *arena, char *file)
typedef OS_READ_WHOLE_FILE_FN(os_read_whole_file_fn);
-#define OS_READ_FILE_FN(name) iz name(iptr file, void *buf, iz size)
-typedef OS_READ_FILE_FN(os_read_file_fn);
-
#define OS_WAIT_ON_VALUE_FN(name) b32 name(i32 *value, i32 current, u32 timeout_ms)
typedef OS_WAIT_ON_VALUE_FN(os_wait_on_value_fn);
@@ -308,10 +296,6 @@ typedef OS_SHARED_MEMORY_UNLOCK_REGION_FN(os_shared_memory_region_unlock_fn);
#define OS_FNS \
X(add_file_watch) \
- X(alloc_arena) \
- X(close) \
- X(open_for_write) \
- X(read_file) \
X(read_whole_file) \
X(shared_memory_region_lock) \
X(shared_memory_region_unlock) \
@@ -342,8 +326,6 @@ struct OS {
iptr error_handle;
GLWorkerThreadContext compute_worker;
- char *export_pipe_name;
-
DEBUG_DECL(renderdoc_start_frame_capture_fn *start_frame_capture;)
DEBUG_DECL(renderdoc_end_frame_capture_fn *end_frame_capture;)
};