Commit: 1545fc74e9b5d723d8087d76f48d93a8f4bf7f96
Parent: 6cccfe00b1445c336730fd0869ad616bdee2e254
Author: Randy Palamar
Date: Mon, 24 Feb 2025 16:03:22 -0700
asynchronous compute
Diffstat:
15 files changed, 901 insertions(+), 720 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -23,19 +23,32 @@ make_valid_test_dim(uv3 in)
}
static BeamformFrameIterator
-beamform_frame_iterator(BeamformerCtx *ctx)
+beamform_frame_iterator(BeamformerCtx *ctx, i32 start_index, i32 stop_index)
{
+ ASSERT(start_index < ARRAY_COUNT(ctx->beamform_frames));
+ ASSERT(stop_index < ARRAY_COUNT(ctx->beamform_frames));
+ ASSERT(stop_index >= 0 || start_index >= 0);
+
+ u32 needed_frames;
+ if (stop_index < 0 || start_index < 0)
+ needed_frames = ARRAY_COUNT(ctx->beamform_frames);
+ else
+ needed_frames = (u32)(stop_index - start_index) % ARRAY_COUNT(ctx->beamform_frames);
+
+ if (start_index < 0)
+ start_index = stop_index;
+
BeamformFrameIterator result;
result.frames = ctx->beamform_frames;
- result.offset = ctx->displayed_frame_index;
+ result.offset = start_index;
result.capacity = ARRAY_COUNT(ctx->beamform_frames);
result.cursor = 0;
- result.needed_frames = ORONE(ctx->params->raw.output_points.w);
+ result.needed_frames = needed_frames;
return result;
}
static BeamformFrame *
-frame_next(BeamformFrameIterator *bfi)
+frame_next_backwards(BeamformFrameIterator *bfi)
{
BeamformFrame *result = 0;
if (bfi->cursor != bfi->needed_frames) {
@@ -45,11 +58,20 @@ frame_next(BeamformFrameIterator *bfi)
return result;
}
+static BeamformFrame *
+frame_next_forwards(BeamformFrameIterator *bfi)
+{
+ BeamformFrame *result = 0;
+ if (bfi->cursor != bfi->needed_frames) {
+ u32 index = (bfi->offset + bfi->cursor++) % bfi->capacity;
+ result = bfi->frames + index;
+ }
+ return result;
+}
+
static void
alloc_beamform_frame(GLParams *gp, BeamformFrame *out, uv3 out_dim, u32 frame_index, s8 name)
{
- glDeleteTextures(1, &out->texture);
-
out->dim.x = CLAMP(round_down_power_of_2(ORONE(out_dim.x)), 1, gp->max_3d_texture_dim);
out->dim.y = CLAMP(round_down_power_of_2(ORONE(out_dim.y)), 1, gp->max_3d_texture_dim);
out->dim.z = CLAMP(round_down_power_of_2(ORONE(out_dim.z)), 1, gp->max_3d_texture_dim);
@@ -67,9 +89,13 @@ alloc_beamform_frame(GLParams *gp, BeamformFrame *out, uv3 out_dim, u32 frame_in
stream_append_u64(&label, frame_index);
stream_append_s8(&label, s8("]"));
+ glDeleteTextures(1, &out->texture);
glCreateTextures(GL_TEXTURE_3D, 1, &out->texture);
glTextureStorage3D(out->texture, out->mips, GL_RG32F, out->dim.x, out->dim.y, out->dim.z);
LABEL_GL_OBJECT(GL_TEXTURE, out->texture, stream_to_s8(&label));
+
+ glDeleteQueries(ARRAY_COUNT(out->timer_ids), out->timer_ids);
+ glCreateQueries(GL_TIME_ELAPSED, ARRAY_COUNT(out->timer_ids), out->timer_ids);
}
static void
@@ -82,8 +108,13 @@ alloc_output_image(BeamformerCtx *ctx, uv3 output_dim)
uv3 odim = ctx->averaged_frame.dim;
UnloadRenderTexture(ctx->fsctx.output);
+ /* TODO(rnp): sometimes when accepting data on w32 something happens
+ * and the program will stall in vprintf in TraceLog(...) here.
+ * for now do this to avoid the problem */
+ SetTraceLogLevel(LOG_NONE);
/* TODO: select odim.x vs odim.y */
ctx->fsctx.output = LoadRenderTexture(odim.x, odim.z);
+ SetTraceLogLevel(LOG_INFO);
LABEL_GL_OBJECT(GL_FRAMEBUFFER, ctx->fsctx.output.id, s8("Rendered_View"));
GenTextureMipmaps(&ctx->fsctx.output.texture);
//SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_ANISOTROPIC_8X);
@@ -181,110 +212,53 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a)
static BeamformWork *
beamform_work_queue_pop(BeamformWorkQueue *q)
{
- BeamformWork *result = q->first;
- if (result) {
- switch (result->type) {
- case BW_FULL_COMPUTE:
- case BW_RECOMPUTE:
- case BW_PARTIAL_COMPUTE:
- /* NOTE: only one compute is allowed per frame */
- if (q->did_compute_this_frame) {
- result = 0;
- } else {
- q->compute_in_flight--;
- q->did_compute_this_frame = 1;
- ASSERT(q->compute_in_flight >= 0);
- }
- break;
- }
- }
- /* NOTE: only do this once we have determined if we are doing the work */
- if (result) {
- q->first = result->next;
- if (result == q->last) {
- ASSERT(result->next == 0);
- q->last = 0;
- }
- }
+ BeamformWork *result = 0;
+
+ static_assert(ISPOWEROF2(ARRAY_COUNT(q->work_items)), "queue capacity must be a power of 2");
+ u64 val = atomic_load(&q->queue);
+ u64 mask = ARRAY_COUNT(q->work_items) - 1;
+ u32 widx = val & mask;
+ u32 ridx = val >> 32 & mask;
+
+ if (ridx != widx)
+ result = q->work_items + ridx;
return result;
}
-static BeamformWork *
-beamform_work_queue_push(BeamformerCtx *ctx, Arena *a, enum beamform_work work_type)
+static void
+beamform_work_queue_pop_commit(BeamformWorkQueue *q)
{
- /* TODO: we should have a sub arena specifically for this purpose */
+ atomic_add(&q->queue, 0x100000000ULL);
+}
- BeamformWorkQueue *q = &ctx->beamform_work_queue;
- ComputeShaderCtx *cs = &ctx->csctx;
+DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_FN(beamform_work_queue_push)
+{
+ BeamformWork *result = 0;
- BeamformWork *result = q->next_free;
- if (result) q->next_free = result->next;
- else result = alloc(a, typeof(*result), 1);
-
- if (result) {
- result->type = work_type;
- result->next = 0;
-
- switch (work_type) {
- case BW_FULL_COMPUTE:
- if (q->compute_in_flight >= ARRAY_COUNT(cs->raw_data_fences)) {
- result->next = q->next_free;
- q->next_free = result;
- result = 0;
- break;
- }
- cs->raw_data_index++;
- if (cs->raw_data_index >= ARRAY_COUNT(cs->raw_data_fences))
- cs->raw_data_index = 0;
- /* FALLTHROUGH */
- case BW_RECOMPUTE: {
- i32 raw_index = cs->raw_data_index;
- result->compute_ctx.raw_data_ssbo_index = raw_index;
- /* NOTE: if this times out it means the command queue is more than 3
- * frames behind. In that case we need to re-evaluate the buffer size */
- if (cs->raw_data_fences[raw_index]) {
- i32 result = glClientWaitSync(cs->raw_data_fences[raw_index], 0,
- 10000);
- if (result == GL_TIMEOUT_EXPIRED) {
- //ASSERT(0);
- }
- glDeleteSync(cs->raw_data_fences[raw_index]);
- cs->raw_data_fences[raw_index] = NULL;
- }
- ctx->displayed_frame_index++;
- if (ctx->displayed_frame_index >= ARRAY_COUNT(ctx->beamform_frames))
- ctx->displayed_frame_index = 0;
- result->compute_ctx.frame = ctx->beamform_frames + ctx->displayed_frame_index;
- result->compute_ctx.first_pass = 1;
-
- BeamformFrameIterator bfi = beamform_frame_iterator(ctx);
- for (BeamformFrame *frame = frame_next(&bfi); frame; frame = frame_next(&bfi)) {
- uv3 try_dim = ctx->params->raw.output_points.xyz;
- if (!uv3_equal(frame->dim, try_dim)) {
- u32 index = (bfi.offset - bfi.cursor) % bfi.capacity;
- alloc_beamform_frame(&ctx->gl, frame, try_dim, index,
- s8("Beamformed_Data"));
- }
- }
- } /* FALLTHROUGH */
- case BW_PARTIAL_COMPUTE:
- q->compute_in_flight++;
- case BW_SAVE_FRAME:
- case BW_SEND_FRAME:
- case BW_SSBO_COPY:
- break;
- }
+ static_assert(ISPOWEROF2(ARRAY_COUNT(q->work_items)), "queue capacity must be a power of 2");
+ u64 val = atomic_load(&q->queue);
+ u64 mask = ARRAY_COUNT(q->work_items) - 1;
+ u32 widx = val & mask;
+ u32 ridx = val >> 32 & mask;
+ u32 next = (widx + 1) & mask;
- if (result) {
- if (q->last) q->last = q->last->next = result;
- else q->last = q->first = result;
- }
+ if (val & 0x80000000)
+ atomic_and(&q->queue, ~0x80000000);
+
+ if (next != ridx) {
+ result = q->work_items + widx;
+ zero_struct(result);
}
return result;
}
+DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit)
+{
+ atomic_add(&q->queue, 1);
+}
+
static void
export_frame(BeamformerCtx *ctx, iptr handle, BeamformFrame *frame)
{
@@ -294,7 +268,7 @@ export_frame(BeamformerCtx *ctx, iptr handle, BeamformFrame *frame)
glGetTextureImage(frame->texture, 0, GL_RG, GL_FLOAT, out_size, ctx->export_buffer.beg);
s8 raw = {.len = out_size, .data = ctx->export_buffer.beg};
if (!ctx->platform.write_file(handle, raw))
- TraceLog(LOG_WARNING, "failed to export frame\n");
+ ctx->platform.write_file(ctx->platform.error_file_handle, s8("failed to export frame\n"));
ctx->platform.close(handle);
}
@@ -316,57 +290,58 @@ do_sum_shader(ComputeShaderCtx *cs, u32 *in_textures, u32 in_texture_count, f32
}
}
-static void
-do_beamform_shader(ComputeShaderCtx *cs, BeamformerParameters *bp, BeamformFrame *frame,
- u32 rf_ssbo, iv3 dispatch_dim, iv3 compute_dim_offset, i32 compute_pass)
-{
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, rf_ssbo);
- glUniform3iv(cs->volume_export_dim_offset_id, 1, compute_dim_offset.E);
- glUniform1i(cs->volume_export_pass_id, compute_pass);
-
- glBindImageTexture(0, frame->texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F);
- glDispatchCompute(ORONE(dispatch_dim.x / 32),
- ORONE(dispatch_dim.y),
- ORONE(dispatch_dim.z / 32));
-}
+struct compute_cursor {
+ iv3 cursor;
+ iv3 dispatch;
+ iv3 target;
+};
-static b32
-do_partial_compute_step(BeamformerCtx *ctx, BeamformFrame *frame)
+static struct compute_cursor
+start_compute_cursor(uv3 dim, u32 max_points)
{
- ComputeShaderCtx *cs = &ctx->csctx;
- PartialComputeCtx *pc = &ctx->partial_compute_ctx;
+ struct compute_cursor result = {0};
+ u32 invocations_per_dispatch = DAS_LOCAL_SIZE_X * DAS_LOCAL_SIZE_Y * DAS_LOCAL_SIZE_Z;
- b32 done = 0;
+ result.dispatch.y = MIN(max_points / invocations_per_dispatch, MAX(dim.y / DAS_LOCAL_SIZE_Y, 1));
- /* NOTE: we start this elsewhere on the first dispatch so that we can include
- * times such as decoding/demodulation/etc. */
- if (!pc->timer_active) {
- glQueryCounter(pc->timer_ids[0], GL_TIMESTAMP);
- pc->timer_active = 1;
- }
-
- glBeginQuery(GL_TIME_ELAPSED, cs->timer_ids[cs->timer_index][pc->shader]);
- cs->timer_active[cs->timer_index][pc->shader] = 1;
+ u32 remaining = max_points / result.dispatch.y;
+ result.dispatch.x = MIN(remaining / invocations_per_dispatch, MAX(dim.x / DAS_LOCAL_SIZE_X, 1));
+ result.dispatch.z = MIN(remaining / (invocations_per_dispatch * result.dispatch.x),
+ MAX(dim.z / DAS_LOCAL_SIZE_Z, 1));
- glUseProgram(cs->programs[pc->shader]);
+ result.target.x = MAX(dim.x / result.dispatch.x / DAS_LOCAL_SIZE_X, 1);
+ result.target.y = MAX(dim.y / result.dispatch.y / DAS_LOCAL_SIZE_Y, 1);
+ result.target.z = MAX(dim.z / result.dispatch.z / DAS_LOCAL_SIZE_Z, 1);
- /* NOTE: We must tile this otherwise GL will kill us for taking too long */
- /* TODO: this could be based on multiple dimensions */
- i32 dispatch_count = frame->dim.z / 32;
- iv3 dim_offset = {.z = !!dispatch_count * 32 * pc->dispatch_index++};
- iv3 dispatch_dim = {.x = frame->dim.x, .y = frame->dim.y, .z = 1};
- do_beamform_shader(cs, &ctx->params->raw, frame, pc->rf_data_ssbo, dispatch_dim, dim_offset, 1);
+ return result;
+}
- if (pc->dispatch_index >= dispatch_count) {
- pc->dispatch_index = 0;
- done = 1;
+static iv3
+step_compute_cursor(struct compute_cursor *cursor)
+{
+ iv3 result = cursor->cursor;
+ result.x *= cursor->dispatch.x * DAS_LOCAL_SIZE_X;
+ result.y *= cursor->dispatch.y * DAS_LOCAL_SIZE_Y;
+ result.z *= cursor->dispatch.z * DAS_LOCAL_SIZE_Z;
+
+ cursor->cursor.x += 1;
+ if (cursor->cursor.x >= cursor->target.x) {
+ cursor->cursor.x = 0;
+ cursor->cursor.y += 1;
+ if (cursor->cursor.y >= cursor->target.y) {
+ cursor->cursor.y = 0;
+ cursor->cursor.z += 1;
+ }
}
- glQueryCounter(pc->timer_ids[1], GL_TIMESTAMP);
-
- glEndQuery(GL_TIME_ELAPSED);
+ return result;
+}
- return done;
+static b32
+compute_cursor_finished(struct compute_cursor *cursor)
+{
+ b32 result = cursor->cursor.z > cursor->target.z;
+ return result;
}
static void
@@ -377,9 +352,6 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformFrame *frame, u32 raw
uv2 rf_raw_dim = ctx->params->raw.rf_raw_dim;
size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16);
- glBeginQuery(GL_TIME_ELAPSED, csctx->timer_ids[csctx->timer_index][shader]);
- csctx->timer_active[csctx->timer_index][shader] = 1;
-
glUseProgram(csctx->programs[shader]);
u32 output_ssbo_idx = !csctx->last_output_ssbo_index;
@@ -430,168 +402,277 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformFrame *frame, u32 raw
}
} break;
case CS_DAS: {
- u32 rf_ssbo = csctx->rf_data_ssbos[input_ssbo_idx];
- iv3 dispatch_dim = {.x = frame->dim.x, .y = frame->dim.y, .z = frame->dim.z};
- do_beamform_shader(csctx, &ctx->params->raw, frame, rf_ssbo, dispatch_dim, (iv3){0}, 0);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]);
+ glBindImageTexture(0, frame->texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F);
+
+ #if 1
+ /* TODO(rnp): compute max_points_per_dispatch based on something like a
+ * transmit_count * channel_count product */
+ u32 max_points_per_dispatch = KB(64);
+ struct compute_cursor cursor = start_compute_cursor(frame->dim, max_points_per_dispatch);
+ for (iv3 offset = step_compute_cursor(&cursor);
+ !compute_cursor_finished(&cursor);
+ offset = step_compute_cursor(&cursor))
+ {
+ /* IMPORTANT(rnp): prevents OS from coalescing and killing our shader */
+ glFinish();
+ glUniform3iv(csctx->voxel_offset_id, 1, offset.E);
+ glDispatchCompute(cursor.dispatch.x, cursor.dispatch.y, cursor.dispatch.z);
+ }
+ #else
+ /* NOTE(rnp): use this for testing tiling code. The performance of the above path
+ * should be the same as this path if everything is working correctly */
+ iv3 compute_dim_offset = {0};
+ glUniform3iv(csctx->voxel_offset_id, 1, compute_dim_offset.E);
+ glDispatchCompute(ORONE(frame->dim.x / 32),
+ ORONE(frame->dim.y),
+ ORONE(frame->dim.z / 32));
+ #endif
} break;
case CS_SUM: {
u32 frame_count = 0;
u32 *in_textures = alloc(&arena, u32, MAX_BEAMFORMED_SAVED_FRAMES);
- BeamformFrameIterator bfi = beamform_frame_iterator(ctx);
- for (BeamformFrame *frame = frame_next(&bfi); frame; frame = frame_next(&bfi))
+ BeamformFrameIterator bfi = beamform_frame_iterator(ctx, ctx->display_frame_index,
+ ctx->params->raw.output_points.w);
+ for (BeamformFrame *frame = frame_next_backwards(&bfi);
+ frame;
+ frame = frame_next_backwards(&bfi))
+ {
in_textures[frame_count++] = frame->texture;
+ }
do_sum_shader(csctx, in_textures, frame_count, 1 / (f32)frame_count,
ctx->averaged_frame.texture, ctx->averaged_frame.dim);
} break;
default: ASSERT(0);
}
-
- glEndQuery(GL_TIME_ELAPSED);
}
-static BeamformFrame *
-start_beamform_compute_work(BeamformWork *work, ComputeShaderCtx *cs, BeamformerParametersFull *bpf)
+static u32
+compile_shader(Platform *platform, Arena a, u32 type, s8 shader, s8 name)
{
- BeamformFrame *result = work->compute_ctx.frame;
- if (bpf->upload) {
- glNamedBufferSubData(cs->shared_ubo, 0, sizeof(bpf->raw), &bpf->raw);
- bpf->upload = 0;
+ u32 sid = glCreateShader(type);
+ glShaderSource(sid, 1, (const char **)&shader.data, (int *)&shader.len);
+ glCompileShader(sid);
+
+ i32 res = 0;
+ glGetShaderiv(sid, GL_COMPILE_STATUS, &res);
+
+ if (res == GL_FALSE) {
+ Stream buf = arena_stream(&a);
+ stream_append_s8(&buf, name);
+ stream_append_s8(&buf, s8(": failed to compile\n"));
+
+ i32 len = 0, out_len = 0;
+ glGetShaderiv(sid, GL_INFO_LOG_LENGTH, &len);
+ glGetShaderInfoLog(sid, len, &out_len, (char *)(buf.data + buf.widx));
+ buf.widx += out_len;
+ glDeleteShader(sid);
+ platform->write_file(platform->error_file_handle, stream_to_s8(&buf));
+
+ sid = 0;
}
- result->min_coordinate = bpf->raw.output_min_coordinate;
- result->max_coordinate = bpf->raw.output_max_coordinate;
+ return sid;
+}
+static u32
+link_program(Platform *platform, Arena a, u32 shader_id)
+{
+ i32 success = 0;
+ u32 result = glCreateProgram();
+ glAttachShader(result, shader_id);
+ glLinkProgram(result);
+ glGetProgramiv(result, GL_LINK_STATUS, &success);
+ if (success == GL_FALSE) {
+ i32 len = 0;
+ Stream buf = arena_stream(&a);
+ stream_append_s8(&buf, s8("shader link error: "));
+ glGetProgramInfoLog(result, buf.cap - buf.widx, &len, (c8 *)(buf.data + buf.widx));
+ buf.widx = len;
+ stream_append_byte(&buf, '\n');
+ platform->write_file(platform->error_file_handle, stream_to_s8(&buf));
+ glDeleteProgram(result);
+ result = 0;
+ }
return result;
}
static void
-do_beamform_work(BeamformerCtx *ctx, Arena *a)
+reload_compute_shader(BeamformerCtx *ctx, s8 path, ComputeShaderReloadContext *csr, Arena tmp)
+{
+ ComputeShaderCtx *cs = &ctx->csctx;
+
+ /* NOTE: arena works as stack (since everything here is 1 byte aligned) */
+ s8 header_in_arena = {.data = tmp.beg};
+ if (csr->needs_header)
+ header_in_arena = push_s8(&tmp, s8(COMPUTE_SHADER_HEADER));
+
+ s8 shader_text = ctx->platform.read_whole_file(&tmp, (c8 *)path.data);
+ shader_text.data -= header_in_arena.len;
+ shader_text.len += header_in_arena.len;
+
+ if (shader_text.data == header_in_arena.data) {
+ u32 shader_id = compile_shader(&ctx->platform, tmp, GL_COMPUTE_SHADER, shader_text, path);
+ if (shader_id) {
+ u32 new_program = link_program(&ctx->platform, tmp, shader_id);
+ if (new_program) {
+ Stream buf = arena_stream(&tmp);
+ stream_append_s8(&buf, s8("loaded: "));
+ stream_append_s8(&buf, path);
+ stream_append_byte(&buf, '\n');
+ ctx->platform.write_file(ctx->platform.error_file_handle,
+ stream_to_s8(&buf));
+ glDeleteProgram(cs->programs[csr->shader]);
+ cs->programs[csr->shader] = new_program;
+ glUseProgram(cs->programs[csr->shader]);
+ glBindBufferBase(GL_UNIFORM_BUFFER, 0, cs->shared_ubo);
+ LABEL_GL_OBJECT(GL_PROGRAM, cs->programs[csr->shader], csr->label);
+ }
+ }
+
+ glDeleteShader(shader_id);
+ } else {
+ Stream buf = arena_stream(&tmp);
+ stream_append_s8(&buf, s8("failed to load: "));
+ stream_append_s8(&buf, path);
+ stream_append_byte(&buf, '\n');
+ ctx->platform.write_file(ctx->platform.error_file_handle, stream_to_s8(&buf));
+ /* TODO(rnp): return an error and don't let the work item calling this function
+ * call pop off the queue; store a retry count and only fail after multiple tries */
+ }
+}
+
+DEBUG_EXPORT BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute)
{
- BeamformWorkQueue *q = &ctx->beamform_work_queue;
+ BeamformerCtx *ctx = (BeamformerCtx *)user_context;
+ BeamformWorkQueue *q = ctx->beamform_work_queue;
BeamformWork *work = beamform_work_queue_pop(q);
ComputeShaderCtx *cs = &ctx->csctx;
+ BeamformerParameters *bp = &ctx->params->raw;
+
+ if (ctx->csctx.programs[CS_DAS])
+ glProgramUniform1f(ctx->csctx.programs[CS_DAS], ctx->csctx.cycle_t_id, cycle_t);
+
while (work) {
switch (work->type) {
- case BW_PARTIAL_COMPUTE: {
- BeamformFrame *frame = work->compute_ctx.frame;
-
- if (work->compute_ctx.first_pass) {
- start_beamform_compute_work(work, cs, ctx->params);
-
- PartialComputeCtx *pc = &ctx->partial_compute_ctx;
- pc->runtime = 0;
- pc->timer_active = 1;
- glQueryCounter(pc->timer_ids[0], GL_TIMESTAMP);
- glDeleteBuffers(1, &pc->rf_data_ssbo);
- glCreateBuffers(1, &pc->rf_data_ssbo);
- glNamedBufferStorage(pc->rf_data_ssbo, decoded_data_size(cs), 0, 0);
- LABEL_GL_OBJECT(GL_BUFFER, pc->rf_data_ssbo, s8("Volume_RF_SSBO"));
-
- /* TODO: maybe we should have some concept of compute shader
- * groups, then we could define a group that does the decoding
- * and filtering and apply that group directly here. For now
- * we will do this dumb thing */
- u32 stage_count = ctx->params->compute_stages_count;
- enum compute_shaders *stages = ctx->params->compute_stages;
- for (u32 i = 0; i < stage_count; i++) {
- if (stages[i] == CS_DAS) {
- ctx->partial_compute_ctx.shader = stages[i];
- break;
+ case BW_RELOAD_SHADER: {
+ ComputeShaderReloadContext *csr = work->reload_shader_ctx;
+ reload_compute_shader(ctx, csr->path, csr, arena);
+
+ /* TODO(rnp): remove this */
+ #define X(idx, name) cs->name##_id = glGetUniformLocation(cs->programs[idx], "u_" #name);
+ CS_UNIFORMS
+ #undef X
+ } break;
+ case BW_LOAD_RF_DATA: {
+ u32 raw_index = cs->raw_data_index;
+ if (cs->raw_data_fences[raw_index]) {
+ GLsync fence = cs->raw_data_fences[raw_index];
+ i32 status = glClientWaitSync(fence, 0, 0);
+ if (status != GL_ALREADY_SIGNALED) {
+ ctx->platform.write_file(ctx->platform.error_file_handle,
+ s8("stall while loading RF data\n"));
+ u64 timeout = ctx->gl.max_server_wait_time;
+ for (;;) {
+ status = glClientWaitSync(fence, 0, timeout);
+ if (status == GL_CONDITION_SATISFIED ||
+ status == GL_ALREADY_SIGNALED)
+ {
+ break;
+ }
}
- do_compute_shader(ctx, *a, frame,
- work->compute_ctx.raw_data_ssbo_index,
- stages[i]);
}
- u32 output_ssbo = pc->rf_data_ssbo;
- u32 input_ssbo = cs->rf_data_ssbos[cs->last_output_ssbo_index];
- size rf_size = decoded_data_size(cs);
- glCopyNamedBufferSubData(input_ssbo, output_ssbo, 0, 0, rf_size);
+ glDeleteSync(cs->raw_data_fences[raw_index]);
+ cs->raw_data_fences[raw_index] = 0;
}
- b32 done = do_partial_compute_step(ctx, frame);
- if (!done) {
- BeamformWork *new;
- /* NOTE: this push must not fail */
- new = beamform_work_queue_push(ctx, a, BW_PARTIAL_COMPUTE);
- new->compute_ctx.first_pass = 0;
- new->compute_ctx.frame = frame;
- new->compute_ctx.export_handle = work->compute_ctx.export_handle;
- } else if (work->compute_ctx.export_handle != INVALID_FILE) {
- export_frame(ctx, work->compute_ctx.export_handle, frame);
- work->compute_ctx.export_handle = INVALID_FILE;
- /* NOTE: do not waste a bunch of GPU space holding onto the volume
- * texture if it was just for export */
- glDeleteTextures(1, &frame->texture);
- mem_clear(frame, 0, sizeof(*frame));
+ if (!uv2_equal(cs->rf_raw_dim, bp->rf_raw_dim) ||
+ !uv4_equal(cs->dec_data_dim, bp->dec_data_dim))
+ {
+ alloc_shader_storage(ctx, arena);
}
+
+ uv2 rf_raw_dim = cs->rf_raw_dim;
+ size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16);
+ void *rf_data_buf = cs->raw_data_arena.beg + raw_index * rf_raw_size;
+
+ size rlen = ctx->platform.read_file(work->file_handle, rf_data_buf, rf_raw_size);
+ if (rlen != rf_raw_size) {
+ stream_append_s8(&ctx->error_stream, s8("Partial Read Occurred: "));
+ stream_append_i64(&ctx->error_stream, rlen);
+ stream_append_byte(&ctx->error_stream, '/');
+ stream_append_i64(&ctx->error_stream, rf_raw_size);
+ stream_append_byte(&ctx->error_stream, '\n');
+ ctx->platform.write_file(ctx->platform.error_file_handle,
+ stream_to_s8(&ctx->error_stream));
+ ctx->error_stream.widx = 0;
+ } else {
+ switch (ctx->gl.vendor_id) {
+ case GL_VENDOR_AMD:
+ case GL_VENDOR_ARM:
+ case GL_VENDOR_INTEL:
+ break;
+ case GL_VENDOR_NVIDIA:
+ glNamedBufferSubData(cs->raw_data_ssbo, raw_index * rlen,
+ rlen, rf_data_buf);
+ }
+ }
+ ctx->ready_for_rf = 1;
} break;
- case BW_FULL_COMPUTE:
- case BW_RECOMPUTE: {
- BeamformFrame *frame = start_beamform_compute_work(work, cs, ctx->params);
+ case BW_COMPUTE: {
+ atomic_store(&cs->processing_compute, 1);
+ BeamformFrame *frame = work->frame;
+ if (ctx->params->upload) {
+ glNamedBufferSubData(cs->shared_ubo, 0, sizeof(ctx->params->raw),
+ &ctx->params->raw);
+ ctx->params->upload = 0;
+ }
+
+ uv3 try_dim = ctx->params->raw.output_points.xyz;
+ if (!uv3_equal(try_dim, frame->dim)) {
+ size frame_index = frame - ctx->beamform_frames;
+ alloc_beamform_frame(&ctx->gl, frame, try_dim, frame_index,
+ s8("Beamformed_Data"));
+ }
+
+ frame->in_flight = 1;
+ frame->min_coordinate = ctx->params->raw.output_min_coordinate;
+ frame->max_coordinate = ctx->params->raw.output_max_coordinate;
u32 stage_count = ctx->params->compute_stages_count;
enum compute_shaders *stages = ctx->params->compute_stages;
- for (u32 i = 0; i < stage_count; i++)
- do_compute_shader(ctx, *a, frame, work->compute_ctx.raw_data_ssbo_index,
- stages[i]);
-
- if (work->compute_ctx.export_handle != INVALID_FILE) {
- export_frame(ctx, work->compute_ctx.export_handle, frame);
- work->compute_ctx.export_handle = INVALID_FILE;
+ for (u32 i = 0; i < stage_count; i++) {
+ frame->timer_active[stages[i]] = 1;
+ glBeginQuery(GL_TIME_ELAPSED, frame->timer_ids[stages[i]]);
+ do_compute_shader(ctx, arena, frame, cs->raw_data_index, stages[i]);
+ glEndQuery(GL_TIME_ELAPSED);
+ }
+ /* NOTE(rnp): block until work completes so that we can record timings */
+ glFinish();
+
+ for (u32 i = 0; i < ARRAY_COUNT(frame->timer_ids); i++) {
+ u64 ns = 0;
+ if (frame->timer_active[i]) {
+ glGetQueryObjectui64v(frame->timer_ids[i], GL_QUERY_RESULT, &ns);
+ frame->timer_active[i] = 0;
+ }
+ frame->compute_times[i] = (f32)ns / 1e9;
}
- ctx->fsctx.gen_mipmaps = 1;
+ frame->ready_to_present = 1;
+ cs->processing_compute = 0;
+ } break;
+ case BW_SAVE_FRAME: {
+ BeamformFrame *frame = work->output_frame_ctx.frame;
+ ASSERT(frame->ready_to_present);
+ export_frame(ctx, work->output_frame_ctx.file_handle, frame);
} break;
}
-
- work->next = q->next_free;
- q->next_free = work;
+ beamform_work_queue_pop_commit(q);
work = beamform_work_queue_pop(q);
}
-
- if (q->did_compute_this_frame) {
- u32 tidx = ctx->csctx.timer_index;
- glDeleteSync(ctx->csctx.timer_fences[tidx]);
- ctx->csctx.timer_fences[tidx] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
- ctx->csctx.timer_index = (tidx + 1) % ARRAY_COUNT(ctx->csctx.timer_fences);
- }
-}
-
-static void
-check_compute_timers(ComputeShaderCtx *cs, PartialComputeCtx *pc, BeamformerParametersFull *bp)
-{
- /* NOTE: volume generation running timer */
- if (pc->timer_active) {
- u64 start_ns = 0, end_ns = 0;
- glGetQueryObjectui64v(pc->timer_ids[0], GL_QUERY_RESULT, &start_ns);
- glGetQueryObjectui64v(pc->timer_ids[1], GL_QUERY_RESULT, &end_ns);
- u64 elapsed_ns = end_ns - start_ns;
- pc->runtime += (f32)elapsed_ns * 1e-9;
- pc->timer_active = 0;
- }
-
- /* NOTE: main timers for display portion of the program */
- u32 last_idx = (cs->timer_index - 1) % ARRAY_COUNT(cs->timer_fences);
- if (!cs->timer_fences[last_idx])
- return;
-
- i32 status = glClientWaitSync(cs->timer_fences[last_idx], 0, 0);
- if (status == GL_TIMEOUT_EXPIRED || status == GL_WAIT_FAILED)
- return;
- glDeleteSync(cs->timer_fences[last_idx]);
- cs->timer_fences[last_idx] = NULL;
-
- for (u32 i = 0; i < bp->compute_stages_count; i++) {
- u64 ns = 0;
- i32 idx = bp->compute_stages[i];
- if (cs->timer_active[last_idx][idx]) {
- glGetQueryObjectui64v(cs->timer_ids[last_idx][idx], GL_QUERY_RESULT, &ns);
- cs->timer_active[last_idx][idx] = 0;
- }
- cs->last_frame_time[idx] = (f32)ns / 1e9;
- }
}
#include "ui.c"
@@ -602,7 +683,6 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step)
cycle_t += dt_for_frame;
if (cycle_t > 1) cycle_t -= 1;
- glProgramUniform1f(ctx->csctx.programs[CS_DAS], ctx->csctx.cycle_t_id, cycle_t);
if (IsWindowResized()) {
ctx->window_size.h = GetScreenHeight();
@@ -613,83 +693,83 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step)
ui_init(ctx, ctx->ui_backing_store);
}
- if (ctx->flags & START_COMPUTE) {
- if (ui_can_start_compute(ctx))
- ui_start_compute(ctx);
- ctx->flags &= ~START_COMPUTE;
+ if (ctx->start_compute && !input->pipe_data_available) {
+ if (ctx->beamform_frames[ctx->display_frame_index].ready_to_present) {
+ BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue);
+ if (work) {
+ /* TODO(rnp): cleanup all the duplicates of this */
+ work->type = BW_COMPUTE;
+ work->frame = ctx->beamform_frames + ctx->next_render_frame_index++;
+ work->frame->ready_to_present = 0;
+ if (ctx->next_render_frame_index >= ARRAY_COUNT(ctx->beamform_frames))
+ ctx->next_render_frame_index = 0;
+ beamform_work_queue_push_commit(ctx->beamform_work_queue);
+ }
+ }
+ ctx->platform.wake_thread(ctx->platform.compute_worker.sync_handle);
+ ctx->start_compute = 0;
}
- /* NOTE: Store the compute time for the last frame. */
- check_compute_timers(&ctx->csctx, &ctx->partial_compute_ctx, ctx->params);
-
BeamformerParameters *bp = &ctx->params->raw;
- /* NOTE: Check for and Load RF Data into GPU */
- if (input->pipe_data_available) {
- BeamformWork *work = beamform_work_queue_push(ctx, arena, BW_FULL_COMPUTE);
- /* NOTE: we can only read in the new data if we get back a work item.
- * otherwise we have too many frames in flight and should wait until the
- * next frame to try again */
+ if (ctx->ready_for_rf && input->pipe_data_available) {
+ BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue);
if (work) {
- ComputeShaderCtx *cs = &ctx->csctx;
-
- if (ctx->params->export_next_frame) {
- /* TODO: we don't really want the beamformer opening/closing files */
- iptr f = ctx->platform.open_for_write(ctx->params->export_pipe_name);
- work->compute_ctx.export_handle = f;
- ctx->params->export_next_frame = 0;
- } else {
- work->compute_ctx.export_handle = INVALID_FILE;
- }
-
- b32 output_3d = bp->output_points.x > 1 && bp->output_points.y > 1 &&
- bp->output_points.z > 1;
-
- if (output_3d) {
- work->type = BW_PARTIAL_COMPUTE;
- BeamformFrame *frame = &ctx->partial_compute_ctx.frame;
- uv3 out_dim = ctx->params->raw.output_points.xyz;
- alloc_beamform_frame(&ctx->gl, frame, out_dim, 0, s8("Beamformed_Volume"));
- work->compute_ctx.frame = frame;
+ ctx->start_compute = 1;
+ ctx->ready_for_rf = 0;
+
+ work->type = BW_LOAD_RF_DATA;
+ work->file_handle = input->pipe_handle;
+ beamform_work_queue_push_commit(ctx->beamform_work_queue);
+
+ BeamformWork *compute = beamform_work_queue_push(ctx->beamform_work_queue);
+ if (compute) {
+ compute->type = BW_COMPUTE;
+ compute->frame = ctx->beamform_frames + ctx->next_render_frame_index++;
+ compute->frame->ready_to_present = 0;
+ if (ctx->next_render_frame_index >= ARRAY_COUNT(ctx->beamform_frames))
+ ctx->next_render_frame_index = 0;
+ beamform_work_queue_push_commit(ctx->beamform_work_queue);
+
+ if (ctx->params->export_next_frame) {
+ BeamformWork *export = beamform_work_queue_push(ctx->beamform_work_queue);
+ if (export) {
+ /* TODO: we don't really want the beamformer opening/closing files */
+ iptr f = ctx->platform.open_for_write(ctx->params->export_pipe_name);
+ export->type = BW_SAVE_FRAME;
+ export->output_frame_ctx.file_handle = f;
+ export->output_frame_ctx.frame = compute->frame;
+ beamform_work_queue_push_commit(ctx->beamform_work_queue);
+ }
+ ctx->params->export_next_frame = 0;
+ }
}
- if (!uv2_equal(cs->rf_raw_dim, bp->rf_raw_dim) ||
- !uv4_equal(cs->dec_data_dim, bp->dec_data_dim))
- {
- alloc_shader_storage(ctx, *arena);
+ if (ctx->params->upload) {
+ /* TODO(rnp): clean this up */
+ ctx->ui->read_params = 1;
}
- u32 raw_index = work->compute_ctx.raw_data_ssbo_index;
- uv2 rf_raw_dim = cs->rf_raw_dim;
- size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16);
- void *rf_data_buf = cs->raw_data_arena.beg + raw_index * rf_raw_size;
-
alloc_output_image(ctx, bp->output_points.xyz);
+ }
+ }
- size rlen = ctx->platform.read_pipe(input->pipe_handle, rf_data_buf, rf_raw_size);
- if (rlen != rf_raw_size) {
- stream_append_s8(&ctx->error_stream, s8("Partial Read Occurred: "));
- stream_append_i64(&ctx->error_stream, rlen);
- stream_append_byte(&ctx->error_stream, '/');
- stream_append_i64(&ctx->error_stream, rf_raw_size);
- stream_append_s8(&ctx->error_stream, s8("\n\0"));
- TraceLog(LOG_WARNING, (c8 *)stream_to_s8(&ctx->error_stream).data);
- ctx->error_stream.widx = 0;
- } else {
- switch (ctx->gl.vendor_id) {
- case GL_VENDOR_AMD:
- case GL_VENDOR_ARM:
- case GL_VENDOR_INTEL:
- break;
- case GL_VENDOR_NVIDIA:
- glNamedBufferSubData(cs->raw_data_ssbo, raw_index * rlen,
- rlen, rf_data_buf);
- }
- }
+ BeamformFrameIterator bfi = beamform_frame_iterator(ctx, ctx->display_frame_index,
+ ctx->next_render_frame_index);
+ for (BeamformFrame *frame = frame_next_forwards(&bfi);
+ frame;
+ frame = frame_next_forwards(&bfi))
+ {
+ if (frame->in_flight && frame->ready_to_present) {
+ frame->in_flight = 0;
+ ctx->display_frame_index = (bfi.offset + bfi.cursor - 1) % bfi.capacity;
+ ctx->fsctx.gen_mipmaps = 1;
}
}
- ctx->beamform_work_queue.did_compute_this_frame = 0;
- do_beamform_work(ctx, arena);
+ if (ctx->start_compute) {
+ ctx->start_compute = 0;
+ ctx->platform.wake_thread(ctx->platform.compute_worker.sync_handle);
+ }
/* NOTE: draw output image texture using render fragment shader */
BeamformFrame *frame_to_draw = 0;
@@ -703,8 +783,8 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step)
frame_to_draw = &ctx->averaged_frame;
out_texture = ctx->averaged_frame.texture;
} else {
- frame_to_draw = ctx->beamform_frames + ctx->displayed_frame_index;
- out_texture = frame_to_draw->texture;
+ frame_to_draw = ctx->beamform_frames + ctx->display_frame_index;
+ out_texture = frame_to_draw->ready_to_present ? frame_to_draw->texture : 0;
}
glBindTextureUnit(0, out_texture);
glUniform1f(fs->db_cutoff_id, fs->db);
@@ -725,5 +805,5 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step)
draw_ui(ctx, input, frame_to_draw);
if (WindowShouldClose())
- ctx->flags |= SHOULD_EXIT;
+ ctx->should_exit = 1;
}
diff --git a/beamformer.h b/beamformer.h
@@ -5,7 +5,7 @@
#include <glad.h>
#define GRAPHICS_API_OPENGL_43
-#include <raylib.h>
+#include <raylib_extended.h>
#include <rlgl.h>
#include "util.h"
@@ -33,11 +33,6 @@
/* TODO: multiple views */
#define MAX_DISPLAYS 1
-enum program_flags {
- SHOULD_EXIT = 1 << 0,
- START_COMPUTE = 1 << 1,
-};
-
enum gl_vendor_ids {
GL_VENDOR_AMD,
GL_VENDOR_ARM,
@@ -105,24 +100,13 @@ typedef struct {
} ScaleBar;
typedef struct {
- TempArena frame_temporary_arena;
- Arena arena_for_frame;
+ b32 executable_reloaded;
+ b32 pipe_data_available;
+ iptr pipe_handle;
- Font font;
- Font small_font;
- f32 font_height;
- f32 small_font_height;
-
- InteractionState interaction;
- InputState text_input_state;
-
- ScaleBar scale_bars[MAX_DISPLAYS][2];
- v2_sll *scale_bar_savepoint_freelist;
-
- v2 ruler_start_p;
- v2 ruler_stop_p;
- u32 ruler_state;
-} BeamformerUI;
+ v2 mouse;
+ v2 last_mouse;
+} BeamformerInput;
#define MAX_FRAMES_IN_FLIGHT 3
@@ -157,6 +141,7 @@ typedef struct {
} CudaLib;
#include "beamformer_parameters.h"
+
typedef struct {
BeamformerParameters raw;
enum compute_shaders compute_stages[16];
@@ -166,22 +151,42 @@ typedef struct {
c8 export_pipe_name[1024];
} BeamformerParametersFull;
-#define CS_UNIFORMS \
- X(CS_DAS, volume_export_dim_offset) \
- X(CS_DAS, volume_export_pass) \
- X(CS_DAS, cycle_t) \
- X(CS_MIN_MAX, mips_level) \
+typedef struct {
+ TempArena frame_temporary_arena;
+ Arena arena_for_frame;
+
+ Font font;
+ Font small_font;
+ f32 font_height;
+ f32 small_font_height;
+
+ InteractionState interaction;
+ InputState text_input_state;
+
+ ScaleBar scale_bars[MAX_DISPLAYS][2];
+ v2_sll *scale_bar_savepoint_freelist;
+
+ v2 ruler_start_p;
+ v2 ruler_stop_p;
+ u32 ruler_state;
+
+ BeamformerUIParameters params;
+ b32 flush_params;
+ /* TODO(rnp): this is nasty and should be removed */
+ b32 read_params;
+
+ iptr last_displayed_frame;
+} BeamformerUI;
+
+#define CS_UNIFORMS \
+ X(CS_DAS, voxel_offset) \
+ X(CS_DAS, cycle_t) \
+ X(CS_MIN_MAX, mips_level) \
X(CS_SUM, sum_prescale)
typedef struct {
u32 programs[CS_LAST];
- u32 timer_index;
- u32 timer_ids[MAX_FRAMES_IN_FLIGHT][CS_LAST];
- b32 timer_active[MAX_FRAMES_IN_FLIGHT][CS_LAST];
- GLsync timer_fences[MAX_FRAMES_IN_FLIGHT];
- f32 last_frame_time[CS_LAST];
-
/* NOTE: the raw_data_ssbo is allocated at 3x the required size to allow for tiled
* transfers when the GPU is running behind the CPU. It is not mapped on NVIDIA because
* their drivers _will_ store the buffer in the system memory. This doesn't happen
@@ -201,6 +206,8 @@ typedef struct {
u32 shared_ubo;
+ b32 processing_compute;
+
uv4 dec_data_dim;
uv2 rf_raw_dim;
@@ -230,17 +237,13 @@ typedef struct {
v4 max_coordinate;
u32 mips;
-} BeamformFrame;
+ b32 in_flight;
+ b32 ready_to_present;
-typedef struct {
- BeamformFrame frame;
- u32 timer_ids[2];
- f32 runtime;
- u32 rf_data_ssbo;
- u32 shader;
- u32 dispatch_index;
- b32 timer_active;
-} PartialComputeCtx;
+ u32 timer_ids[CS_LAST];
+ f32 compute_times[CS_LAST];
+ b32 timer_active[CS_LAST];
+} BeamformFrame;
typedef struct {
enum gl_vendor_ids vendor_id;
@@ -250,51 +253,47 @@ typedef struct {
i32 max_3d_texture_dim;
i32 max_ssbo_size;
i32 max_ubo_size;
+ i32 max_server_wait_time;
} GLParams;
enum beamform_work {
- BW_FULL_COMPUTE,
- BW_RECOMPUTE,
- BW_PARTIAL_COMPUTE,
+ BW_COMPUTE,
+ BW_LOAD_RF_DATA,
+ BW_RELOAD_SHADER,
BW_SAVE_FRAME,
BW_SEND_FRAME,
- BW_SSBO_COPY,
};
typedef struct {
- u32 source_ssbo;
- u32 dest_ssbo;
-} BeamformSSBOCopy;
-
-typedef struct {
- BeamformFrame *frame;
- iptr export_handle;
- u32 raw_data_ssbo_index;
- b32 first_pass;
-} BeamformCompute;
+ void *beamformer_ctx;
+ s8 label;
+ s8 path;
+ u32 shader;
+ b32 needs_header;
+} ComputeShaderReloadContext;
typedef struct {
BeamformFrame *frame;
- iptr output_handle;
-} BeamformOutputFrame;
+ iptr file_handle;
+} BeamformOutputFrameContext;
/* NOTE: discriminated union based on type */
-typedef struct BeamformWork {
- struct BeamformWork *next;
+typedef struct {
union {
- BeamformSSBOCopy ssbo_copy_ctx;
- BeamformCompute compute_ctx;
- BeamformOutputFrame output_frame_ctx;
+ iptr file_handle;
+ BeamformFrame *frame;
+ BeamformOutputFrameContext output_frame_ctx;
+ ComputeShaderReloadContext *reload_shader_ctx;
};
u32 type;
} BeamformWork;
typedef struct {
- BeamformWork *first;
- BeamformWork *last;
- BeamformWork *next_free;
- i32 compute_in_flight;
- b32 did_compute_this_frame;
+ union {
+ u64 queue;
+ struct {u32 widx, ridx;};
+ };
+ BeamformWork work_items[1 << 6];
} BeamformWorkQueue;
typedef struct {
@@ -309,19 +308,23 @@ typedef struct BeamformerCtx {
GLParams gl;
uv2 window_size;
- u32 flags;
+ b32 start_compute;
+ b32 should_exit;
+
+ /* TODO(rnp): is there a better way of tracking this? */
+ b32 ready_for_rf;
Arena ui_backing_store;
BeamformerUI *ui;
BeamformFrame beamform_frames[MAX_BEAMFORMED_SAVED_FRAMES];
- u32 displayed_frame_index;
+ u32 next_render_frame_index;
+ u32 display_frame_index;
/* NOTE: this will only be used when we are averaging */
BeamformFrame averaged_frame;
ComputeShaderCtx csctx;
FragmentShaderCtx fsctx;
- PartialComputeCtx partial_compute_ctx;
Arena export_buffer;
@@ -329,7 +332,7 @@ typedef struct BeamformerCtx {
Platform platform;
Stream error_stream;
- BeamformWorkQueue beamform_work_queue;
+ BeamformWorkQueue *beamform_work_queue;
BeamformerParametersFull *params;
} BeamformerCtx;
@@ -340,4 +343,13 @@ typedef struct BeamformerCtx {
BeamformerInput *input)
typedef BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step_fn);
+#define BEAMFORMER_COMPLETE_COMPUTE_FN(name) void name(iptr user_context, Arena arena)
+typedef BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute_fn);
+
+#define BEAMFORM_WORK_QUEUE_PUSH_FN(name) BeamformWork *name(BeamformWorkQueue *q)
+typedef BEAMFORM_WORK_QUEUE_PUSH_FN(beamform_work_queue_push_fn);
+
+#define BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(name) void name(BeamformWorkQueue *q)
+typedef BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit_fn);
+
#endif /*_BEAMFORMER_H_ */
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -29,6 +29,18 @@ enum compute_shaders {
#define MAX_BEAMFORMED_SAVED_FRAMES 16
/* NOTE: This struct follows the OpenGL std140 layout. DO NOT modify unless you have
* read and understood the rules, particulary with regards to _member alignment_ */
+
+typedef struct {
+ v4 output_min_coordinate; /* [m] Back-Top-Left corner of output region (w ignored) */
+ v4 output_max_coordinate; /* [m] Front-Bottom-Right corner of output region (w ignored)*/
+ f32 sampling_frequency; /* [Hz] */
+ f32 center_frequency; /* [Hz] */
+ f32 speed_of_sound; /* [m/s] */
+ f32 off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */
+ i32 beamform_plane; /* Plane to Beamform in 2D HERCULES */
+ f32 f_number; /* F# (set to 0 to disable) */
+} BeamformerUIParameters;
+
typedef struct {
u16 channel_mapping[256]; /* Transducer Channel to Verasonics Channel */
u16 uforces_channels[256]; /* Channels used for virtual UFORCES elements */
@@ -37,24 +49,33 @@ typedef struct {
f32 xdc_transform[16]; /* IMPORTANT: column major order */
uv4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */
uv4 output_points; /* Width * Height * Depth * (Frame Average Count) */
- v4 output_min_coordinate; /* [m] Back-Top-Left corner of output region (w ignored) */
- v4 output_max_coordinate; /* [m] Front-Bottom-Right corner of output region (w ignored)*/
f32 xdc_element_pitch[2]; /* [m] Transducer Element Pitch {row, col} */
uv2 rf_raw_dim; /* Raw Data Dimensions */
i32 transmit_mode; /* Method/Orientation of Transmit */
u32 decode; /* Decode or just reshape data */
- f32 speed_of_sound; /* [m/s] */
+ u32 das_shader_id;
+ f32 time_offset; /* pulse length correction time [s] */
+
+ /* TODO(rnp): actually use a substruct but generate a header compatible with MATLAB */
+ /* UI Parameters */
+ v4 output_min_coordinate; /* [m] Back-Top-Left corner of output region (w ignored) */
+ v4 output_max_coordinate; /* [m] Front-Bottom-Right corner of output region (w ignored)*/
f32 sampling_frequency; /* [Hz] */
f32 center_frequency; /* [Hz] */
- f32 time_offset; /* pulse length correction time [s] */
+ f32 speed_of_sound; /* [m/s] */
f32 off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */
i32 beamform_plane; /* Plane to Beamform in 2D HERCULES */
f32 f_number; /* F# (set to 0 to disable) */
- u32 das_shader_id;
+
u32 readi_group_id; /* Which readi group this data is from */
u32 readi_group_size; /* Size of readi transmit group */
} BeamformerParameters;
+_Static_assert((offsetof(BeamformerParameters, output_min_coordinate) & 15) == 0,
+ "BeamformerParameters.output_min_coordinate must lie on a 16 byte boundary");
+_Static_assert((sizeof(BeamformerParameters) & 15) == 0,
+ "sizeof(BeamformerParameters) must be a multiple of 16");
+
/* NOTE: garbage to get the prepocessor to properly stringize the value of a macro */
#define str_(x) #x
#define str(x) str_(x)
@@ -70,20 +91,20 @@ layout(std140, binding = 0) uniform parameters {\n\
mat4 xdc_transform; /* IMPORTANT: column major order */\n\
uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */\n\
uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */\n\
- vec4 output_min_coord; /* [m] Top left corner of output region */\n\
- vec4 output_max_coord; /* [m] Bottom right corner of output region */\n\
vec2 xdc_element_pitch; /* [m] Transducer Element Pitch {row, col} */\n\
uvec2 rf_raw_dim; /* Raw Data Dimensions */\n\
int transmit_mode; /* Method/Orientation of Transmit */\n\
uint decode; /* Decode or just reshape data */\n\
- float speed_of_sound; /* [m/s] */\n\
+ uint das_shader_id;\n\
+ float time_offset; /* pulse length correction time [s] */\n\
+ vec4 output_min_coord; /* [m] Top left corner of output region */\n\
+ vec4 output_max_coord; /* [m] Bottom right corner of output region */\n\
float sampling_frequency; /* [Hz] */\n\
float center_frequency; /* [Hz] */\n\
- float time_offset; /* pulse length correction time [s] */\n\
+ float speed_of_sound; /* [m/s] */\n\
float off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */\n\
int beamform_plane; /* Plane to Beamform in 2D HERCULES */\n\
float f_number; /* F# (set to 0 to disable) */\n\
- uint das_shader_id;\n\
uint readi_group_id; /* Which readi group this data is from */\n\
uint readi_group_size; /* Size of readi transmit group */\n\
};\n\
@@ -97,4 +118,9 @@ layout(std140, binding = 0) uniform parameters {\n\
#define DAS_ID_RCA_VLS " str(DAS_ID_RCA_VLS) "\n\
#define DAS_ID_RCA_TPW " str(DAS_ID_RCA_TPW) "\n\
\n\
-#line 0\n"
+#line 1\n"
+
+/* TODO(rnp): bake this into the das shader header */
+#define DAS_LOCAL_SIZE_X 32
+#define DAS_LOCAL_SIZE_Y 1
+#define DAS_LOCAL_SIZE_Z 32
diff --git a/build.sh b/build.sh
@@ -59,22 +59,22 @@ mkdir -p external/lib
build_raylib()
{
- cp external/raylib/src/raylib.h external/raylib/src/rlgl.h external/include/
+ cp external/raylib/src/rlgl.h external/include/
cppflags="${2} -DPLATFORM_DESKTOP_GLFW -DGRAPHICS_API_OPENGL_43"
cppflags="${cppflags} -Iexternal/raylib/src -Iexternal/raylib/src/external/glfw/include"
case ${1} in
shared)
${cc} ${cflags} ${cppflags} -fPIC -shared -DBUILD_LIBTYPE_SHARED \
- external/raylib/src/raudio.c external/raylib/src/rcore.c \
- external/raylib/src/rmodels.c external/raylib/src/rshapes.c \
- external/raylib/src/rtext.c external/raylib/src/rtextures.c \
- external/raylib/src/utils.c \
+ external/rcore_extended.c \
+ external/raylib/src/raudio.c external/raylib/src/rmodels.c \
+ external/raylib/src/rshapes.c external/raylib/src/rtext.c \
+ external/raylib/src/rtextures.c external/raylib/src/utils.c \
-o ${raylib}
;;
static)
+ ${cc} ${cflags} ${cppflags} -c external/rcore_extended.c -o external/lib/rcore.c.o
${cc} ${cflags} ${cppflags} -c external/raylib/src/raudio.c -o external/lib/raudio.c.o
- ${cc} ${cflags} ${cppflags} -c external/raylib/src/rcore.c -o external/lib/rcore.c.o
${cc} ${cflags} ${cppflags} -c external/raylib/src/rmodels.c -o external/lib/rmodels.c.o
${cc} ${cflags} ${cppflags} -c external/raylib/src/rshapes.c -o external/lib/rshapes.c.o
${cc} ${cflags} ${cppflags} -c external/raylib/src/rtext.c -o external/lib/rtext.c.o
diff --git a/external/include/raylib_extended.h b/external/include/raylib_extended.h
@@ -0,0 +1,2 @@
+#include "../raylib/src/raylib.h"
+RLAPI void *GetPlatformWindowHandle(void);
diff --git a/external/rcore_extended.c b/external/rcore_extended.c
@@ -0,0 +1,8 @@
+/* NOTE(rnp): hacky stuff to work around broken raylib garbage */
+#include <raylib_extended.h>
+#include "raylib/src/rcore.c"
+
+void *GetPlatformWindowHandle(void)
+{
+ return (void *)platform.handle;
+}
diff --git a/intrinsics.c b/intrinsics.c
@@ -4,6 +4,11 @@
#define sqrt_f32(a) __builtin_sqrtf(a)
#define atan2_f32(y, x) __builtin_atan2f(y, x)
+#define atomic_store(ptr, n) __atomic_store_n(ptr, n, __ATOMIC_RELEASE)
+#define atomic_load(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE)
+#define atomic_and(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_RELEASE)
+#define atomic_add(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_RELEASE)
+
static FORCE_INLINE u32
clz_u32(u32 a)
{
diff --git a/main_linux.c b/main_linux.c
@@ -21,8 +21,9 @@
#include "static.c"
static void
-dispatch_file_watch_events(FileWatchContext *fwctx, Arena arena)
+dispatch_file_watch_events(Platform *platform, Arena arena)
{
+ FileWatchContext *fwctx = &platform->file_watch_context;
u8 *mem = alloc_(&arena, 4096, 64, 1);
Stream path = stream_alloc(&arena, 256);
struct inotify_event *event;
@@ -46,7 +47,7 @@ dispatch_file_watch_events(FileWatchContext *fwctx, Arena arena)
stream_append_s8(&path, file);
stream_append_byte(&path, 0);
path.widx--;
- fw->callback(stream_to_s8(&path),
+ fw->callback(platform, stream_to_s8(&path),
fw->user_data, arena);
path.widx = 0;
break;
@@ -62,10 +63,11 @@ main(void)
{
BeamformerCtx ctx = {0};
BeamformerInput input = {.executable_reloaded = 1};
- Arena temp_memory = os_alloc_arena((Arena){0}, 16 * MEGABYTE);
- ctx.error_stream = stream_alloc(&temp_memory, 1 * MEGABYTE);
+ Arena temp_memory = os_alloc_arena((Arena){0}, MB(16));
+ ctx.error_stream = stream_alloc(&temp_memory, MB(1));
- ctx.ui_backing_store = sub_arena(&temp_memory, 2 * MEGABYTE, 4096);
+ ctx.ui_backing_store = sub_arena(&temp_memory, MB(2), KB(4));
+ ctx.platform.compute_worker.arena = sub_arena(&temp_memory, MB(2), KB(4));
Pipe data_pipe = os_open_named_pipe(OS_PIPE_NAME);
input.pipe_handle = data_pipe.file;
@@ -76,9 +78,12 @@ main(void)
#undef X
ctx.platform.file_watch_context.handle = inotify_init1(O_NONBLOCK|O_CLOEXEC);
+ ctx.platform.compute_worker.asleep = 1;
+ ctx.platform.error_file_handle = STDERR_FILENO;
- setup_beamformer(&ctx, &temp_memory);
debug_init(&ctx.platform, (iptr)&input, &temp_memory);
+ setup_beamformer(&ctx, &temp_memory);
+ os_wake_thread(ctx.platform.compute_worker.sync_handle);
struct pollfd fds[2] = {{0}, {0}};
fds[0].fd = ctx.platform.file_watch_context.handle;
@@ -86,10 +91,10 @@ main(void)
fds[1].fd = data_pipe.file;
fds[1].events = POLLIN;
- while (!(ctx.flags & SHOULD_EXIT)) {
+ while (!ctx.should_exit) {
poll(fds, 2, 0);
if (fds[0].revents & POLLIN)
- dispatch_file_watch_events(&ctx.platform.file_watch_context, temp_memory);
+ dispatch_file_watch_events(&ctx.platform, temp_memory);
input.pipe_data_available = !!(fds[1].revents & POLLIN);
input.last_mouse = input.mouse;
diff --git a/main_w32.c b/main_w32.c
@@ -46,7 +46,7 @@ w32_wide_char_to_mb(Stream *s, u16 *wstr, u32 wide_char_length)
}
static void
-dispatch_file_watch(FileWatchDirectory *fw_dir, u8 *buf, Arena arena)
+dispatch_file_watch(Platform *platform, FileWatchDirectory *fw_dir, u8 *buf, Arena arena)
{
i64 offset = 0;
Stream path = stream_alloc(&arena, 256);
@@ -73,8 +73,7 @@ dispatch_file_watch(FileWatchDirectory *fw_dir, u8 *buf, Arena arena)
for (u32 i = 0; i < fw_dir->file_watch_count; i++) {
FileWatch *fw = fw_dir->file_watches + i;
if (fw->hash == hash) {
- fw->callback(stream_to_s8(&path),
- fw->user_data, arena);
+ fw->callback(platform, stream_to_s8(&path), fw->user_data, arena);
break;
}
}
@@ -98,7 +97,7 @@ clear_io_queue(Platform *platform, BeamformerInput *input, Arena arena)
switch (event->tag) {
case W32_IO_FILE_WATCH: {
FileWatchDirectory *dir = (FileWatchDirectory *)event->context;
- dispatch_file_watch(dir, dir->buffer.beg, arena);
+ dispatch_file_watch(platform, dir, dir->buffer.beg, arena);
zero_struct(overlapped);
ReadDirectoryChangesW(dir->handle, dir->buffer.beg, 4096, 0,
FILE_NOTIFY_CHANGE_LAST_WRITE, 0, overlapped, 0);
@@ -143,10 +142,11 @@ main(void)
{
BeamformerCtx ctx = {0};
BeamformerInput input = {.executable_reloaded = 1};
- Arena temp_memory = os_alloc_arena((Arena){0}, 16 * MEGABYTE);
- ctx.error_stream = stream_alloc(&temp_memory, 1 * MEGABYTE);
+ Arena temp_memory = os_alloc_arena((Arena){0}, MB(16));
+ ctx.error_stream = stream_alloc(&temp_memory, MB(1));
- ctx.ui_backing_store = sub_arena(&temp_memory, 2 * MEGABYTE, 4096);
+ ctx.ui_backing_store = sub_arena(&temp_memory, MB(2), KB(4));
+ ctx.platform.compute_worker.arena = sub_arena(&temp_memory, MB(2), KB(4));
Pipe data_pipe = os_open_named_pipe(OS_PIPE_NAME);
input.pipe_handle = data_pipe.file;
@@ -158,12 +158,16 @@ main(void)
w32_context w32_ctx = {0};
w32_ctx.io_completion_handle = CreateIoCompletionPort(INVALID_FILE, 0, 0, 0);
- ctx.platform.os_context = (iptr)&w32_ctx;
- setup_beamformer(&ctx, &temp_memory);
+ ctx.platform.os_context = (iptr)&w32_ctx;
+ ctx.platform.compute_worker.asleep = 1;
+ ctx.platform.error_file_handle = GetStdHandle(STD_ERROR_HANDLE);
+
debug_init(&ctx.platform, (iptr)&input, &temp_memory);
+ setup_beamformer(&ctx, &temp_memory);
+ os_wake_thread(ctx.platform.compute_worker.sync_handle);
- while (!(ctx.flags & SHOULD_EXIT)) {
+ while (!ctx.should_exit) {
clear_io_queue(&ctx.platform, &input, temp_memory);
input.last_mouse = input.mouse;
diff --git a/os_unix.c b/os_unix.c
@@ -8,6 +8,8 @@
#include <dlfcn.h>
#include <fcntl.h>
#include <poll.h>
+#include <pthread.h>
+#include <semaphore.h>
#include <sys/inotify.h>
#include <sys/mman.h>
#include <sys/stat.h>
@@ -71,20 +73,19 @@ static PLATFORM_OPEN_FOR_WRITE_FN(os_open_for_write)
return result;
}
-static s8
-os_read_file(Arena *a, char *file, size filesize)
+static PLATFORM_READ_WHOLE_FILE_FN(os_read_whole_file)
{
s8 result = {0};
+ struct stat sb;
i32 fd = open(file, O_RDONLY);
- if (fd >= 0) {
- result = s8alloc(a, filesize);
+ if (fd >= 0 && fstat(fd, &sb) >= 0) {
+ result = s8alloc(arena, sb.st_size);
size rlen = read(fd, result.data, result.len);
- if (rlen != result.len) {
+ if (rlen != result.len)
result = (s8){0};
- }
- close(fd);
}
+ if (fd >= 0) close(fd);
return result;
}
@@ -99,19 +100,12 @@ static PLATFORM_WRITE_NEW_FILE_FN(os_write_new_file)
return ret;
}
-static FileStats
-os_get_file_stats(char *fname)
+static b32
+os_file_exists(char *path)
{
struct stat st;
-
- if (stat(fname, &st) < 0) {
- return ERROR_FILE_STATS;
- }
-
- return (FileStats){
- .filesize = st.st_size,
- .timestamp = (f64)st.st_mtim.tv_sec + (f64)st.st_mtim.tv_nsec * 1e-9,
- };
+ b32 result = stat(path, &st) == 0;
+ return result;
}
static Pipe
@@ -121,13 +115,13 @@ os_open_named_pipe(char *name)
return (Pipe){.file = open(name, O_RDONLY|O_NONBLOCK), .name = name};
}
-static PLATFORM_READ_PIPE_FN(os_read_pipe)
+static PLATFORM_READ_FILE_FN(os_read_file)
{
size r = 0, total_read = 0;
do {
if (r != -1)
total_read += r;
- r = read(pipe, buf + total_read, len - total_read);
+ r = read(file, buf + total_read, len - total_read);
} while (r);
return total_read;
}
@@ -255,3 +249,32 @@ static PLATFORM_ADD_FILE_WATCH_FN(os_add_file_watch)
insert_file_watch(dir, s8_cut_head(path, dir->name.len + 1), user_data, callback);
}
+
+i32 pthread_setname_np(pthread_t, char *);
+static iptr
+os_create_thread(iptr user_context, char *name, platform_thread_entry_point_fn *fn)
+{
+ pthread_t result;
+ pthread_create(&result, 0, (void *(*)(void *))fn, (void *)user_context);
+ pthread_setname_np(result, name);
+ return (iptr)result;
+}
+
+static iptr
+os_create_sync_object(Arena *arena)
+{
+ sem_t *result = push_struct(arena, sem_t);
+ sem_init(result, 0, 0);
+ return (iptr)result;
+}
+
+static void
+os_sleep_thread(iptr sync_handle)
+{
+ sem_wait((sem_t *)sync_handle);
+}
+
+static PLATFORM_WAKE_THREAD_FN(os_wake_thread)
+{
+ sem_post((sem_t *)sync_handle);
+}
diff --git a/os_win32.c b/os_win32.c
@@ -34,6 +34,8 @@
#define ERROR_PIPE_NOT_CONNECTED 233L
#define ERROR_PIPE_LISTENING 536L
+#define THREAD_SET_LIMITED_INFORMATION 0x0400
+
typedef struct {
u16 wProcessorArchitecture;
u16 _pad1;
@@ -88,10 +90,13 @@ W32(iptr) CreateFileA(c8 *, u32, u32, void *, u32, u32, void *);
W32(iptr) CreateFileMappingA(iptr, void *, u32, u32, u32, c8 *);
W32(iptr) CreateIoCompletionPort(iptr, iptr, uptr, u32);
W32(iptr) CreateNamedPipeA(c8 *, u32, u32, u32, u32, u32, u32, void *);
+W32(iptr) CreateSemaphoreA(iptr, i64, i64, c8 *);
+W32(iptr) CreateThread(iptr, usize, iptr, iptr, u32, u32 *);
W32(b32) DeleteFileA(c8 *);
W32(b32) DisconnectNamedPipe(iptr);
W32(void) ExitProcess(i32);
W32(b32) FreeLibrary(void *);
+W32(i32) GetFileAttributesA(c8 *);
W32(b32) GetFileInformationByHandle(iptr, w32_file_info *);
W32(i32) GetLastError(void);
W32(void *) GetProcAddress(void *, c8 *);
@@ -102,12 +107,13 @@ W32(void *) LoadLibraryA(c8 *);
W32(void *) MapViewOfFile(iptr, u32, u32, u32, u64);
W32(b32) ReadDirectoryChangesW(iptr, u8 *, u32, b32, u32, u32 *, void *, void *);
W32(b32) ReadFile(iptr, u8 *, i32, i32 *, void *);
+W32(b32) ReleaseSemaphore(iptr, i64, i64 *);
+W32(i32) SetThreadDescription(iptr, u16 *);
+W32(u32) WaitForSingleObjectEx(iptr, u32, b32);
W32(b32) WriteFile(iptr, u8 *, i32, i32 *, void *);
W32(void *) VirtualAlloc(u8 *, size, u32, u32);
W32(b32) VirtualFree(u8 *, size, u32);
-static iptr win32_stderr_handle;
-
static PLATFORM_WRITE_FILE_FN(os_write_file)
{
i32 wlen;
@@ -115,6 +121,8 @@ static PLATFORM_WRITE_FILE_FN(os_write_file)
return raw.len == wlen;
}
+/* TODO(rnp): cleanup callers of this function they should route through error file handle instead */
+static iptr win32_stderr_handle;
static void
os_write_err_msg(s8 msg)
{
@@ -165,26 +173,35 @@ static PLATFORM_OPEN_FOR_WRITE_FN(os_open_for_write)
return result;
}
-static s8
-os_read_file(Arena *a, char *file, size filesize)
+static PLATFORM_READ_WHOLE_FILE_FN(os_read_whole_file)
{
s8 result = {0};
- if (filesize > 0 && filesize <= (size)U32_MAX) {
- result = s8alloc(a, filesize);
- iptr h = CreateFileA(file, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0);
- if (h >= 0) {
- i32 rlen;
- if (!ReadFile(h, result.data, result.len, &rlen, 0) || rlen != result.len) {
- result = (s8){0};
- }
- CloseHandle(h);
- }
+ w32_file_info fileinfo;
+ iptr h = CreateFileA(file, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0);
+ if (h >= 0 && GetFileInformationByHandle(h, &fileinfo)) {
+ size filesize = (size)fileinfo.nFileSizeHigh << 32;
+ filesize |= (size)fileinfo.nFileSizeLow;
+ result = s8alloc(arena, filesize);
+
+ ASSERT(filesize <= (size)U32_MAX);
+
+ i32 rlen;
+ if (!ReadFile(h, result.data, result.len, &rlen, 0) || rlen != result.len)
+ result = (s8){0};
}
+ if (h >= 0) CloseHandle(h);
return result;
}
+static PLATFORM_READ_FILE_FN(os_read_file)
+{
+ i32 total_read = 0;
+ ReadFile(file, buf, len, &total_read, 0);
+ return total_read;
+}
+
static PLATFORM_WRITE_NEW_FILE_FN(os_write_new_file)
{
if (raw.len > (size)U32_MAX) {
@@ -202,42 +219,21 @@ static PLATFORM_WRITE_NEW_FILE_FN(os_write_new_file)
return ret;
}
-static FileStats
-os_get_file_stats(char *fname)
+static b32
+os_file_exists(char *path)
{
- iptr h = CreateFileA(fname, 0, 0, 0, OPEN_EXISTING, 0, 0);
- if (h == INVALID_FILE)
- return ERROR_FILE_STATS;
-
- w32_file_info fileinfo;
- if (!GetFileInformationByHandle(h, &fileinfo)) {
- os_write_err_msg(s8("os_get_file_stats: couldn't get file info\n"));
- CloseHandle(h);
- return ERROR_FILE_STATS;
- }
- CloseHandle(h);
-
- size filesize = (size)fileinfo.nFileSizeHigh << 32;
- filesize |= (size)fileinfo.nFileSizeLow;
-
- return (FileStats){.filesize = filesize, .timestamp = fileinfo.ftLastWriteTime};
+ b32 result = GetFileAttributesA(path) != -1;
+ return result;
}
static Pipe
os_open_named_pipe(char *name)
{
iptr h = CreateNamedPipeA(name, PIPE_ACCESS_INBOUND, PIPE_TYPE_BYTE|PIPE_NOWAIT, 1,
- 0, 1 * MEGABYTE, 0, 0);
+ 0, MB(1), 0, 0);
return (Pipe){.file = h, .name = name};
}
-static PLATFORM_READ_PIPE_FN(os_read_pipe)
-{
- i32 total_read = 0;
- ReadFile(pipe, buf, len, &total_read, 0);
- return total_read;
-}
-
static void *
os_open_shared_memory_area(char *name, size cap)
{
@@ -334,3 +330,30 @@ static PLATFORM_ADD_FILE_WATCH_FN(os_add_file_watch)
insert_file_watch(dir, s8_cut_head(path, dir->name.len + 1), user_data, callback);
}
+
+static iptr
+os_create_thread(iptr user_context, char *name, platform_thread_entry_point_fn *fn)
+{
+ iptr result = CreateThread(0, 0, (iptr)fn, user_context, 0, 0);
+ /* TODO(rnp): name needs to be utf16 encoded */
+ //SetThreadDescription(result, s8_to_16(arena, name).data);
+ return result;
+}
+
+static iptr
+os_create_sync_object(Arena *arena)
+{
+ iptr result = CreateSemaphoreA(0, 0, 1, 0);
+ return result;
+}
+
+static void
+os_sleep_thread(iptr sync_handle)
+{
+ WaitForSingleObjectEx(sync_handle, 0xFFFFFFFF, 0);
+}
+
+static PLATFORM_WAKE_THREAD_FN(os_wake_thread)
+{
+ ReleaseSemaphore(sync_handle, 1, 0);
+}
diff --git a/shaders/das.glsl b/shaders/das.glsl
@@ -7,9 +7,8 @@ layout(std430, binding = 1) readonly restrict buffer buffer_1 {
layout(rg32f, binding = 0) writeonly uniform image3D u_out_data_tex;
-layout(location = 2) uniform int u_volume_export_pass;
-layout(location = 3) uniform ivec3 u_volume_export_dim_offset;
-layout(location = 4) uniform float u_cycle_t;
+layout(location = 2) uniform ivec3 u_voxel_offset;
+layout(location = 3) uniform float u_cycle_t;
#define C_SPLINE 0.5
@@ -66,8 +65,7 @@ vec3 calc_image_point(vec3 voxel)
case DAS_ID_HERCULES:
case DAS_ID_RCA_TPW:
case DAS_ID_RCA_VLS:
- if (u_volume_export_pass == 0)
- image_point.y = off_axis_pos;
+ image_point.y = off_axis_pos;
break;
}
@@ -112,7 +110,7 @@ float cylindricalwave_transmit_distance(vec3 point, float focal_depth, float tra
vec2 RCA(vec3 image_point, vec3 delta, float apodization_arg)
{
uint ridx = 0;
- int direction = beamform_plane * (u_volume_export_pass ^ 1);
+ int direction = beamform_plane;
if (direction != TX_ROWS) image_point = image_point.yxz;
bool tx_col = TX_MODE_TX_COLS(transmit_mode);
@@ -158,7 +156,7 @@ vec2 RCA(vec3 image_point, vec3 delta, float apodization_arg)
vec2 HERCULES(vec3 image_point, vec3 delta, float apodization_arg)
{
uint ridx = 0;
- int direction = beamform_plane * (u_volume_export_pass ^ 1);
+ int direction = beamform_plane;
if (direction != TX_ROWS) image_point = image_point.yxz;
bool tx_col = TX_MODE_TX_COLS(transmit_mode);
@@ -237,9 +235,8 @@ vec2 uFORCES(vec3 image_point, vec3 delta, float apodization_arg)
void main()
{
/* NOTE: Convert voxel to physical coordinates */
- ivec3 out_coord = ivec3(gl_GlobalInvocationID) + u_volume_export_dim_offset;
- vec3 image_point = calc_image_point(vec3(gl_GlobalInvocationID)
- + vec3(u_volume_export_dim_offset));
+ ivec3 out_coord = ivec3(gl_GlobalInvocationID) + u_voxel_offset;
+ vec3 image_point = calc_image_point(vec3(out_coord));
/* NOTE: used for constant F# dynamic receive apodization. This is implemented as:
*
diff --git a/static.c b/static.c
@@ -7,16 +7,32 @@
#else
static void *debug_lib;
-static beamformer_frame_step_fn *beamformer_frame_step;
+#define DEBUG_ENTRY_POINTS \
+ X(beamformer_frame_step) \
+ X(beamformer_complete_compute) \
+ X(beamform_work_queue_push) \
+ X(beamform_work_queue_push_commit)
+
+#define X(name) static name ##_fn *name;
+DEBUG_ENTRY_POINTS
+#undef X
static FILE_WATCH_CALLBACK_FN(debug_reload)
{
BeamformerInput *input = (BeamformerInput *)user_data;
Stream err = arena_stream(&tmp);
+ /* NOTE(rnp): spin until compute thread finishes its work (we will probably
+ * never reload while compute is in progress but just incase). */
+ while (!atomic_load(&platform->compute_worker.asleep));
+
os_unload_library(debug_lib);
debug_lib = os_load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME, &err);
- beamformer_frame_step = os_lookup_dynamic_symbol(debug_lib, "beamformer_frame_step", &err);
+
+ #define X(name) name = os_lookup_dynamic_symbol(debug_lib, #name, &err);
+ DEBUG_ENTRY_POINTS
+ #undef X
+
os_write_err_msg(s8("Reloaded Main Executable\n"));
input->executable_reloaded = 1;
@@ -27,7 +43,7 @@ static void
debug_init(Platform *p, iptr input, Arena *arena)
{
p->add_file_watch(p, arena, s8(OS_DEBUG_LIB_NAME), debug_reload, input);
- debug_reload((s8){0}, input, *arena);
+ debug_reload(p, (s8){0}, input, *arena);
}
#endif /* _DEBUG */
@@ -85,6 +101,7 @@ get_gl_params(GLParams *gl, Stream *err)
glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &gl->max_3d_texture_dim);
glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &gl->max_ssbo_size);
glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &gl->max_ubo_size);
+ glGetIntegerv(GL_MAX_SERVER_WAIT_TIMEOUT, &gl->max_server_wait_time);
}
static void
@@ -132,49 +149,20 @@ dump_gl_params(GLParams *gl, Arena a)
stream_append_i64(&s, gl->version_minor);
stream_append_s8(&s, s8("\nMax 1D/2D Texture Dimension: "));
stream_append_i64(&s, gl->max_2d_texture_dim);
- stream_append_s8(&s, s8("\nMax 3D Texture Dimension: "));
+ stream_append_s8(&s, s8("\nMax 3D Texture Dimension: "));
stream_append_i64(&s, gl->max_3d_texture_dim);
- stream_append_s8(&s, s8("\nMax SSBO Size: "));
+ stream_append_s8(&s, s8("\nMax SSBO Size: "));
stream_append_i64(&s, gl->max_ssbo_size);
- stream_append_s8(&s, s8("\nMax UBO Size: "));
+ stream_append_s8(&s, s8("\nMax UBO Size: "));
stream_append_i64(&s, gl->max_ubo_size);
+ stream_append_s8(&s, s8("\nMax Server Wait Time [ns]: "));
+ stream_append_i64(&s, gl->max_server_wait_time);
stream_append_s8(&s, s8("\n-----------------------\n"));
if (!s.errors)
os_write_err_msg(stream_to_s8(&s));
#endif
}
-static u32
-compile_shader(Arena a, u32 type, s8 shader)
-{
- u32 sid = glCreateShader(type);
- glShaderSource(sid, 1, (const char **)&shader.data, (int *)&shader.len);
- glCompileShader(sid);
-
- i32 res = 0;
- glGetShaderiv(sid, GL_COMPILE_STATUS, &res);
-
- if (res == GL_FALSE) {
- char *stype;
- switch (type) {
- case GL_COMPUTE_SHADER: stype = "Compute"; break;
- case GL_FRAGMENT_SHADER: stype = "Fragment"; break;
- }
-
- TraceLog(LOG_WARNING, "SHADER: [ID %u] %s shader failed to compile", sid, stype);
- i32 len = 0;
- glGetShaderiv(sid, GL_INFO_LOG_LENGTH, &len);
- s8 err = s8alloc(&a, len);
- glGetShaderInfoLog(sid, len, (int *)&err.len, (char *)err.data);
- TraceLog(LOG_WARNING, "SHADER: [ID %u] Compile error: %s", sid, (char *)err.data);
- glDeleteShader(sid);
-
- sid = 0;
- }
-
- return sid;
-}
-
static FILE_WATCH_CALLBACK_FN(reload_render_shader)
{
FragmentShaderCtx *ctx = (FragmentShaderCtx *)user_data;
@@ -192,61 +180,40 @@ static FILE_WATCH_CALLBACK_FN(reload_render_shader)
return 1;
}
-struct compute_shader_reload_ctx {
- BeamformerCtx *ctx;
- s8 label;
- u32 shader;
- b32 needs_header;
-};
-static FILE_WATCH_CALLBACK_FN(reload_compute_shader)
+static FILE_WATCH_CALLBACK_FN(queue_compute_shader_reload)
{
- struct compute_shader_reload_ctx *ctx = (struct compute_shader_reload_ctx *)user_data;
- ComputeShaderCtx *cs = &ctx->ctx->csctx;
-
- b32 result = 1;
-
- /* NOTE: arena works as stack (since everything here is 1 byte aligned) */
- s8 header_in_arena = {.data = tmp.beg};
- if (ctx->needs_header)
- header_in_arena = push_s8(&tmp, s8(COMPUTE_SHADER_HEADER));
-
- size fs = os_get_file_stats((c8 *)path.data).filesize;
-
- s8 shader_text = os_read_file(&tmp, (c8 *)path.data, fs);
- shader_text.data -= header_in_arena.len;
- shader_text.len += header_in_arena.len;
-
- if (shader_text.data == header_in_arena.data) {
- u32 shader_id = compile_shader(tmp, GL_COMPUTE_SHADER, shader_text);
- if (shader_id) {
- glDeleteProgram(cs->programs[ctx->shader]);
- cs->programs[ctx->shader] = rlLoadComputeShaderProgram(shader_id);
- glUseProgram(cs->programs[ctx->shader]);
- glBindBufferBase(GL_UNIFORM_BUFFER, 0, cs->shared_ubo);
- LABEL_GL_OBJECT(GL_PROGRAM, cs->programs[ctx->shader], ctx->label);
-
- TraceLog(LOG_INFO, "%s loaded", path.data);
-
- ctx->ctx->flags |= START_COMPUTE;
- } else {
- result = 0;
+ ComputeShaderReloadContext *csr = (typeof(csr))user_data;
+ BeamformerCtx *ctx = csr->beamformer_ctx;
+ BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue);
+ if (work) {
+ work->type = BW_RELOAD_SHADER;
+ work->reload_shader_ctx = csr;
+ beamform_work_queue_push_commit(ctx->beamform_work_queue);
+ if (ctx->platform.compute_worker.asleep &&
+ ctx->beamform_frames[ctx->display_frame_index].ready_to_present)
+ {
+ BeamformWork *compute = beamform_work_queue_push(ctx->beamform_work_queue);
+ if (compute) {
+ compute->type = BW_COMPUTE;
+ compute->frame = ctx->beamform_frames + ctx->next_render_frame_index++;
+ compute->frame->ready_to_present = 0;
+ if (ctx->next_render_frame_index >= ARRAY_COUNT(ctx->beamform_frames))
+ ctx->next_render_frame_index = 0;
+ beamform_work_queue_push_commit(ctx->beamform_work_queue);
+ }
}
- glDeleteShader(shader_id);
- } else {
- TraceLog(LOG_INFO, "shader failed to load: %s", path.data);
+ ctx->platform.wake_thread(ctx->platform.compute_worker.sync_handle);
}
-
- return result;
+ return 1;
}
static FILE_WATCH_CALLBACK_FN(load_cuda_lib)
{
CudaLib *cl = (CudaLib *)user_data;
- b32 result = 0;
- size fs = os_get_file_stats((c8 *)path.data).filesize;
- if (fs > 0) {
- TraceLog(LOG_INFO, "Loading CUDA lib: %s", OS_CUDA_LIB_NAME);
+ b32 result = os_file_exists((c8 *)path.data);
+ if (result) {
+ os_write_err_msg(s8("loading CUDA lib: " OS_CUDA_LIB_NAME "\n"));
Stream err = arena_stream(&tmp);
os_unload_library(cl->lib);
@@ -254,8 +221,6 @@ static FILE_WATCH_CALLBACK_FN(load_cuda_lib)
#define X(name) cl->name = os_lookup_dynamic_symbol(cl->lib, #name, &err);
CUDA_LIB_FNS
#undef X
-
- result = 1;
}
#define X(name) if (!cl->name) cl->name = name ## _stub;
@@ -265,6 +230,31 @@ static FILE_WATCH_CALLBACK_FN(load_cuda_lib)
return result;
}
+
+#define GLFW_VISIBLE 0x00020004
+void glfwWindowHint(i32, i32);
+iptr glfwCreateWindow(i32, i32, char *, iptr, iptr);
+void glfwMakeContextCurrent(iptr);
+
+#include <stdio.h>
+static PLATFORM_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point)
+{
+ GLWorkerThreadContext *ctx = (GLWorkerThreadContext *)_ctx;
+
+ glfwMakeContextCurrent(ctx->window_handle);
+
+ for (;;) {
+ ctx->asleep = 1;
+ os_sleep_thread(ctx->sync_handle);
+ ctx->asleep = 0;
+ beamformer_complete_compute(ctx->user_context, ctx->arena);
+ }
+
+ unreachable();
+
+ return 0;
+}
+
static void
setup_beamformer(BeamformerCtx *ctx, Arena *memory)
{
@@ -281,6 +271,20 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory)
dump_gl_params(&ctx->gl, *memory);
validate_gl_requirements(&ctx->gl);
+ glfwWindowHint(GLFW_VISIBLE, 0);
+ iptr raylib_window_handle = (iptr)GetPlatformWindowHandle();
+ GLWorkerThreadContext *worker = &ctx->platform.compute_worker;
+ worker->window_handle = glfwCreateWindow(320, 240, "", 0, raylib_window_handle);
+ worker->sync_handle = os_create_sync_object(memory);
+ worker->handle = os_create_thread((iptr)worker, "[compute]",
+ compute_worker_thread_entry_point);
+ /* TODO(rnp): we should lock this down after we have something working */
+ worker->user_context = (iptr)ctx;
+
+ glfwMakeContextCurrent(raylib_window_handle);
+
+ ctx->beamform_work_queue = push_struct(memory, BeamformWorkQueue);
+
ctx->fsctx.db = -50.0f;
ctx->fsctx.threshold = 40.0f;
@@ -295,7 +299,7 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory)
ctx->params->compute_stages_count = 3;
if (ctx->gl.vendor_id == GL_VENDOR_NVIDIA
- && load_cuda_lib(s8(OS_CUDA_LIB_NAME), (iptr)&ctx->cuda_lib, *memory))
+ && load_cuda_lib(&ctx->platform, s8(OS_CUDA_LIB_NAME), (iptr)&ctx->cuda_lib, *memory))
{
os_add_file_watch(&ctx->platform, memory, s8(OS_CUDA_LIB_NAME), load_cuda_lib,
(iptr)&ctx->cuda_lib);
@@ -316,30 +320,24 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory)
glNamedBufferStorage(ctx->csctx.shared_ubo, sizeof(BeamformerParameters), 0, GL_DYNAMIC_STORAGE_BIT);
LABEL_GL_OBJECT(GL_BUFFER, ctx->csctx.shared_ubo, s8("Beamformer_Parameters"));
- glGenQueries(ARRAY_COUNT(ctx->csctx.timer_fences) * CS_LAST, (u32 *)ctx->csctx.timer_ids);
- glGenQueries(ARRAY_COUNT(ctx->partial_compute_ctx.timer_ids), ctx->partial_compute_ctx.timer_ids);
-
- #define X(e, sn, f, nh, pretty_name) do if (s8(f).len > 0) { \
- struct compute_shader_reload_ctx *csr = push_struct(memory, typeof(*csr)); \
- csr->ctx = ctx; \
- csr->label = s8("CS_" #e); \
- csr->shader = sn; \
- csr->needs_header = nh; \
- s8 shader = s8(static_path_join("shaders", f ".glsl")); \
- reload_compute_shader(shader, (iptr)csr, *memory); \
- os_add_file_watch(&ctx->platform, memory, shader, reload_compute_shader, (iptr)csr); \
+ #define X(e, sn, f, nh, pretty_name) do if (s8(f).len > 0) { \
+ ComputeShaderReloadContext *csr = push_struct(memory, typeof(*csr)); \
+ csr->beamformer_ctx = ctx; \
+ csr->label = s8("CS_" #e); \
+ csr->shader = sn; \
+ csr->needs_header = nh; \
+ csr->path = s8(static_path_join("shaders", f ".glsl")); \
+ os_add_file_watch(&ctx->platform, memory, csr->path, queue_compute_shader_reload, (iptr)csr); \
+ queue_compute_shader_reload(&ctx->platform, csr->path, (iptr)csr, *memory); \
} while (0);
COMPUTE_SHADERS
#undef X
+ os_wake_thread(worker->sync_handle);
s8 render = s8(static_path_join("shaders", "render.glsl"));
- reload_render_shader(render, (iptr)&ctx->fsctx, *memory);
+ reload_render_shader(&ctx->platform, render, (iptr)&ctx->fsctx, *memory);
os_add_file_watch(&ctx->platform, memory, render, reload_render_shader, (iptr)&ctx->fsctx);
ctx->fsctx.gen_mipmaps = 0;
- /* TODO(rnp): remove this */
- ComputeShaderCtx *csctx = &ctx->csctx;
- #define X(idx, name) csctx->name##_id = glGetUniformLocation(csctx->programs[idx], "u_" #name);
- CS_UNIFORMS
- #undef X
+ ctx->ready_for_rf = 1;
}
diff --git a/ui.c b/ui.c
@@ -211,9 +211,9 @@ do_scale_bar(BeamformerUI *ui, Stream *buf, Variable var, v2 mouse, i32 directio
static void
draw_display_overlay(BeamformerCtx *ctx, Arena a, v2 mouse, Rect display_rect, BeamformFrame *frame)
{
- BeamformerUI *ui = ctx->ui;
- BeamformerParameters *bp = &ctx->params->raw;
- InteractionState *is = &ui->interaction;
+ BeamformerUI *ui = ctx->ui;
+ BeamformerUIParameters *bp = &ui->params;
+ InteractionState *is = &ui->interaction;
Stream buf = arena_stream(&a);
Texture *output = &ctx->fsctx.output.texture;
@@ -531,8 +531,8 @@ do_text_button(BeamformerUI *ui, s8 text, Rect r, v2 mouse, f32 *hover_t)
static void
draw_settings_ui(BeamformerCtx *ctx, Rect r, v2 mouse)
{
- BeamformerUI *ui = ctx->ui;
- BeamformerParameters *bp = &ctx->params->raw;
+ BeamformerUI *ui = ctx->ui;
+ BeamformerUIParameters *bp = &ui->params;
f32 minx = bp->output_min_coordinate.x + 1e-6, maxx = bp->output_max_coordinate.x - 1e-6;
f32 minz = bp->output_min_coordinate.z + 1e-6, maxz = bp->output_max_coordinate.z - 1e-6;
@@ -684,7 +684,7 @@ draw_settings_ui(BeamformerCtx *ctx, Rect r, v2 mouse)
}
static void
-draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Rect r)
+draw_debug_overlay(BeamformerCtx *ctx, BeamformFrame *frame, Arena arena, Rect r)
{
static s8 labels[CS_LAST] = {
#define X(e, n, s, h, pn) [CS_##e] = s8(pn ":"),
@@ -692,8 +692,7 @@ draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Rect r)
#undef X
};
- BeamformerUI *ui = ctx->ui;
- ComputeShaderCtx *cs = &ctx->csctx;
+ BeamformerUI *ui = ctx->ui;
uv2 ws = ctx->window_size;
Stream buf = stream_alloc(&arena, 64);
@@ -707,28 +706,23 @@ draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Rect r)
draw_text(ui->font, labels[index], pos, 0, colour_from_normalized(FG_COLOUR));
buf.widx = 0;
- stream_append_f64_e(&buf, cs->last_frame_time[index]);
+ stream_append_f64_e(&buf, frame->compute_times[index]);
stream_append_s8(&buf, s8(" [s]"));
v2 txt_fs = measure_text(ui->font, stream_to_s8(&buf));
v2 rpos = {.x = r.pos.x + r.size.w - txt_fs.w, .y = pos.y};
draw_text(ui->font, stream_to_s8(&buf), rpos, 0, colour_from_normalized(FG_COLOUR));
- compute_time_sum += cs->last_frame_time[index];
+ compute_time_sum += frame->compute_times[index];
}
- static s8 totals[2] = {s8("Compute Total:"), s8("Volume Total:")};
- f32 times[2] = {compute_time_sum, ctx->partial_compute_ctx.runtime};
- for (u32 i = 0; i < ARRAY_COUNT(totals); i++) {
- pos.y -= measure_text(ui->font, totals[i]).y;
- draw_text(ui->font, totals[i], pos, 0, colour_from_normalized(FG_COLOUR));
-
- buf.widx = 0;
- stream_append_f64_e(&buf, times[i]);
- stream_append_s8(&buf, s8(" [s]"));
- v2 txt_fs = measure_text(ui->font, stream_to_s8(&buf));
- v2 rpos = {.x = r.pos.x + r.size.w - txt_fs.w, .y = pos.y};
- draw_text(ui->font, stream_to_s8(&buf), rpos, 0, colour_from_normalized(FG_COLOUR));
- }
+ pos.y -= ui->font_height;
+ draw_text(ui->font, s8("Compute Total:"), pos, 0, colour_from_normalized(FG_COLOUR));
+ buf.widx = 0;
+ stream_append_f64_e(&buf, compute_time_sum);
+ stream_append_s8(&buf, s8(" [s]"));
+ v2 txt_fs = measure_text(ui->font, stream_to_s8(&buf));
+ v2 rpos = {.x = r.pos.x + r.size.w - txt_fs.w, .y = pos.y};
+ draw_text(ui->font, stream_to_s8(&buf), rpos, 0, colour_from_normalized(FG_COLOUR));
{
static v2 pos = {.x = 32, .y = 128};
@@ -849,30 +843,6 @@ update_text_input(InputState *is)
}
}
-static b32
-ui_can_start_compute(BeamformerCtx *ctx)
-{
- BeamformFrame *displayed = ctx->beamform_frames + ctx->displayed_frame_index;
- b32 result = ctx->beamform_work_queue.compute_in_flight == 0;
- result &= (displayed->dim.x != 0 || displayed->dim.y != 0);
- result &= displayed->dim.z != 0;
- return result;
-}
-
-static void
-ui_start_compute(BeamformerCtx *ctx)
-{
- /* NOTE: we do not allow ui to start a work if no work was previously completed */
- Arena a = {0};
- if (ui_can_start_compute(ctx)) {
- beamform_work_queue_push(ctx, &a, BW_RECOMPUTE);
- BeamformFrameIterator bfi = beamform_frame_iterator(ctx);
- for (BeamformFrame *frame = frame_next(&bfi); frame; frame = frame_next(&bfi))
- glClearTexImage(frame->texture, 0, GL_RED, GL_FLOAT, 0);
- }
- ctx->params->upload = 1;
-}
-
static void
ui_gen_mipmaps(BeamformerCtx *ctx)
{
@@ -948,7 +918,7 @@ scale_bar_interaction(BeamformerCtx *ctx, v2 mouse)
*sb->max_value = MIN(max, is->active.f32_limits.y);
sb->zoom_starting_point = (v2){.x = F32_INFINITY, .y = F32_INFINITY};
- ui_start_compute(ctx);
+ ui->flush_params = 1;
}
}
@@ -957,11 +927,12 @@ scale_bar_interaction(BeamformerCtx *ctx, v2 mouse)
if (savepoint) {
*sb->min_value = savepoint->v.x;
*sb->max_value = savepoint->v.y;
- ui_start_compute(ctx);
sb->savepoint_stack = savepoint->next;
savepoint->next = ui->scale_bar_savepoint_freelist;
ui->scale_bar_savepoint_freelist = savepoint;
+
+ ui->flush_params = 1;
}
sb->zoom_starting_point = (v2){.x = F32_INFINITY, .y = F32_INFINITY};
}
@@ -972,7 +943,7 @@ scale_bar_interaction(BeamformerCtx *ctx, v2 mouse)
*sb->max_value += mouse_wheel;
*sb->min_value = MAX(limits.x, *sb->min_value);
*sb->max_value = MIN(limits.y, *sb->max_value);
- ui_start_compute(ctx);
+ ui->flush_params = 1;
}
}
@@ -1044,7 +1015,7 @@ ui_end_interact(BeamformerCtx *ctx, v2 mouse)
}
if (is->active.flags & V_CAUSES_COMPUTE)
- ui_start_compute(ctx);
+ ui->flush_params = 1;
if (is->active.flags & V_GEN_MIPMAPS)
ui_gen_mipmaps(ctx);
@@ -1119,10 +1090,10 @@ ui_init(BeamformerCtx *ctx, Arena store)
ui->small_font_height = measure_text(ui->small_font, s8("8\\W")).h;
/* TODO: multiple views */
- ui->scale_bars[0][SB_LATERAL].min_value = &ctx->params->raw.output_min_coordinate.x;
- ui->scale_bars[0][SB_LATERAL].max_value = &ctx->params->raw.output_max_coordinate.x;
- ui->scale_bars[0][SB_AXIAL].min_value = &ctx->params->raw.output_min_coordinate.z;
- ui->scale_bars[0][SB_AXIAL].max_value = &ctx->params->raw.output_max_coordinate.z;
+ ui->scale_bars[0][SB_LATERAL].min_value = &ui->params.output_min_coordinate.x;
+ ui->scale_bars[0][SB_LATERAL].max_value = &ui->params.output_max_coordinate.x;
+ ui->scale_bars[0][SB_AXIAL].min_value = &ui->params.output_min_coordinate.z;
+ ui->scale_bars[0][SB_AXIAL].max_value = &ui->params.output_max_coordinate.z;
ui->scale_bars[0][SB_LATERAL].scroll_both = 1;
ui->scale_bars[0][SB_AXIAL].scroll_both = 0;
@@ -1142,10 +1113,24 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformFrame *frame_to_draw
//end_temp_arena(ui->frame_temporary_arena);
//ui->frame_temporary_arena = begin_temp_arena(&ui->arena_for_frame);
+ /* TODO(rnp): there should be a better way of detecting this */
+ if (ui->read_params) {
+ mem_copy(&ctx->params->raw.output_min_coordinate, &ui->params, sizeof(ui->params));
+ ui->flush_params = 0;
+ ui->read_params = 0;
+ }
+
/* NOTE: process interactions first because the user interacted with
* the ui that was presented last frame */
ui_interact(ctx, input);
+ if (ui->flush_params && !ctx->csctx.processing_compute) {
+ mem_copy(&ui->params, &ctx->params->raw.output_min_coordinate, sizeof(ui->params));
+ ui->flush_params = 0;
+ ctx->params->upload = 1;
+ ctx->start_compute = 1;
+ }
+
BeginDrawing();
ClearBackground(colour_from_normalized(BG_COLOUR));
@@ -1157,8 +1142,8 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformFrame *frame_to_draw
rr.pos.x = lr.pos.x + lr.size.w;
draw_settings_ui(ctx, lr, mouse);
- if (frame_to_draw->dim.w)
+ if (frame_to_draw->ready_to_present)
draw_display_overlay(ctx, ui->arena_for_frame, mouse, rr, frame_to_draw);
- draw_debug_overlay(ctx, ui->arena_for_frame, lr);
+ draw_debug_overlay(ctx, frame_to_draw, ui->arena_for_frame, lr);
EndDrawing();
}
diff --git a/util.h b/util.h
@@ -47,8 +47,9 @@
#define ORONE(x) ((x)? (x) : 1)
#define SIGN(x) ((x) < 0? -1 : 1)
-#define MEGABYTE (1024ULL * 1024ULL)
-#define GIGABYTE (1024ULL * 1024ULL * 1024ULL)
+#define KB(a) ((a) << 10ULL)
+#define MB(a) ((a) << 20ULL)
+#define GB(a) ((a) << 30ULL)
#define U32_MAX (0xFFFFFFFFUL)
#define F32_INFINITY (__builtin_inff())
@@ -65,6 +66,7 @@ typedef uint32_t b32;
typedef float f32;
typedef double f64;
typedef ptrdiff_t size;
+typedef size_t usize;
typedef ptrdiff_t iptr;
typedef size_t uptr;
@@ -156,36 +158,6 @@ typedef struct {
#define INVALID_FILE (-1)
typedef struct {
- size filesize;
- f64 timestamp;
-} FileStats;
-#define ERROR_FILE_STATS (FileStats){.filesize = -1}
-
-#define FILE_WATCH_CALLBACK_FN(name) b32 name(s8 path, iptr user_data, Arena tmp)
-typedef FILE_WATCH_CALLBACK_FN(file_watch_callback);
-
-typedef struct {
- iptr user_data;
- u64 hash;
- file_watch_callback *callback;
-} FileWatch;
-
-typedef struct {
- u64 hash;
- iptr handle;
- s8 name;
- FileWatch file_watches[16];
- u32 file_watch_count;
- Arena buffer;
-} FileWatchDirectory;
-
-typedef struct {
- FileWatchDirectory directory_watches[4];
- iptr handle;
- u32 directory_watch_count;
-} FileWatchContext;
-
-typedef struct {
u8 *data;
u32 widx;
u32 cap;
@@ -229,6 +201,40 @@ typedef struct {
typedef struct Platform Platform;
+typedef struct {
+ Arena arena;
+ iptr handle;
+ iptr window_handle;
+ iptr sync_handle;
+ iptr user_context;
+ b32 asleep;
+} GLWorkerThreadContext;
+
+#define FILE_WATCH_CALLBACK_FN(name) b32 name(Platform *platform, s8 path, iptr user_data, Arena tmp)
+typedef FILE_WATCH_CALLBACK_FN(file_watch_callback);
+
+typedef struct {
+ iptr user_data;
+ u64 hash;
+ file_watch_callback *callback;
+} FileWatch;
+
+typedef struct {
+ u64 hash;
+ iptr handle;
+ s8 name;
+ /* TODO(rnp): just push these as a linked list */
+ FileWatch file_watches[16];
+ u32 file_watch_count;
+ Arena buffer;
+} FileWatchDirectory;
+
+typedef struct {
+ FileWatchDirectory directory_watches[4];
+ iptr handle;
+ u32 directory_watch_count;
+} FileWatchContext;
+
#define PLATFORM_ALLOC_ARENA_FN(name) Arena name(Arena old, size capacity)
typedef PLATFORM_ALLOC_ARENA_FN(platform_alloc_arena_fn);
@@ -236,14 +242,23 @@ typedef PLATFORM_ALLOC_ARENA_FN(platform_alloc_arena_fn);
file_watch_callback *callback, iptr user_data)
typedef PLATFORM_ADD_FILE_WATCH_FN(platform_add_file_watch_fn);
+#define PLATFORM_WAKE_WORKER_FN(name) void name(GLWorkerThreadContext *ctx)
+typedef PLATFORM_WAKE_WORKER_FN(platform_wake_worker_fn);
+
#define PLATFORM_CLOSE_FN(name) void name(iptr file)
typedef PLATFORM_CLOSE_FN(platform_close_fn);
#define PLATFORM_OPEN_FOR_WRITE_FN(name) iptr name(c8 *fname)
typedef PLATFORM_OPEN_FOR_WRITE_FN(platform_open_for_write_fn);
-#define PLATFORM_READ_PIPE_FN(name) size name(iptr pipe, void *buf, size len)
-typedef PLATFORM_READ_PIPE_FN(platform_read_pipe_fn);
+#define PLATFORM_READ_WHOLE_FILE_FN(name) s8 name(Arena *arena, char *file)
+typedef PLATFORM_READ_WHOLE_FILE_FN(platform_read_whole_file_fn);
+
+#define PLATFORM_READ_FILE_FN(name) size name(iptr file, void *buf, size len)
+typedef PLATFORM_READ_FILE_FN(platform_read_file_fn);
+
+#define PLATFORM_WAKE_THREAD_FN(name) void name(iptr sync_handle)
+typedef PLATFORM_WAKE_THREAD_FN(platform_wake_thread_fn);
#define PLATFORM_WRITE_NEW_FILE_FN(name) b32 name(char *fname, s8 raw)
typedef PLATFORM_WRITE_NEW_FILE_FN(platform_write_new_file_fn);
@@ -251,13 +266,18 @@ typedef PLATFORM_WRITE_NEW_FILE_FN(platform_write_new_file_fn);
#define PLATFORM_WRITE_FILE_FN(name) b32 name(iptr file, s8 raw)
typedef PLATFORM_WRITE_FILE_FN(platform_write_file_fn);
-#define PLATFORM_FNS \
- X(add_file_watch) \
- X(alloc_arena) \
- X(close) \
- X(open_for_write) \
- X(read_pipe) \
- X(write_new_file) \
+#define PLATFORM_THREAD_ENTRY_POINT_FN(name) iptr name(iptr _ctx)
+typedef PLATFORM_THREAD_ENTRY_POINT_FN(platform_thread_entry_point_fn);
+
+#define PLATFORM_FNS \
+ X(add_file_watch) \
+ X(alloc_arena) \
+ X(close) \
+ X(open_for_write) \
+ X(read_whole_file) \
+ X(read_file) \
+ X(wake_thread) \
+ X(write_new_file) \
X(write_file)
#define X(name) platform_ ## name ## _fn *name;
@@ -265,18 +285,11 @@ struct Platform {
PLATFORM_FNS
FileWatchContext file_watch_context;
iptr os_context;
+ iptr error_file_handle;
+ GLWorkerThreadContext compute_worker;
};
#undef X
-typedef struct {
- b32 executable_reloaded;
- b32 pipe_data_available;
- iptr pipe_handle;
-
- v2 mouse;
- v2 last_mouse;
-} BeamformerInput;
-
#include "util.c"
#endif /* _UTIL_H_ */