ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

Commit: 1545fc74e9b5d723d8087d76f48d93a8f4bf7f96
Parent: 6cccfe00b1445c336730fd0869ad616bdee2e254
Author: Randy Palamar
Date:   Mon, 24 Feb 2025 16:03:22 -0700

asynchronous compute

Diffstat:
Mbeamformer.c | 740++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mbeamformer.h | 160++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Mbeamformer_parameters.h | 48+++++++++++++++++++++++++++++++++++++-----------
Mbuild.sh | 12++++++------
Aexternal/include/raylib_extended.h | 2++
Aexternal/rcore_extended.c | 8++++++++
Mintrinsics.c | 5+++++
Mmain_linux.c | 21+++++++++++++--------
Mmain_w32.c | 24++++++++++++++----------
Mos_unix.c | 63+++++++++++++++++++++++++++++++++++++++++++--------------------
Mos_win32.c | 103++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
Mshaders/das.glsl | 17+++++++----------
Mstatic.c | 206+++++++++++++++++++++++++++++++++++++++----------------------------------------
Mui.c | 99++++++++++++++++++++++++++++++++++---------------------------------------------
Mutil.h | 113++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
15 files changed, 901 insertions(+), 720 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -23,19 +23,32 @@ make_valid_test_dim(uv3 in) } static BeamformFrameIterator -beamform_frame_iterator(BeamformerCtx *ctx) +beamform_frame_iterator(BeamformerCtx *ctx, i32 start_index, i32 stop_index) { + ASSERT(start_index < ARRAY_COUNT(ctx->beamform_frames)); + ASSERT(stop_index < ARRAY_COUNT(ctx->beamform_frames)); + ASSERT(stop_index >= 0 || start_index >= 0); + + u32 needed_frames; + if (stop_index < 0 || start_index < 0) + needed_frames = ARRAY_COUNT(ctx->beamform_frames); + else + needed_frames = (u32)(stop_index - start_index) % ARRAY_COUNT(ctx->beamform_frames); + + if (start_index < 0) + start_index = stop_index; + BeamformFrameIterator result; result.frames = ctx->beamform_frames; - result.offset = ctx->displayed_frame_index; + result.offset = start_index; result.capacity = ARRAY_COUNT(ctx->beamform_frames); result.cursor = 0; - result.needed_frames = ORONE(ctx->params->raw.output_points.w); + result.needed_frames = needed_frames; return result; } static BeamformFrame * -frame_next(BeamformFrameIterator *bfi) +frame_next_backwards(BeamformFrameIterator *bfi) { BeamformFrame *result = 0; if (bfi->cursor != bfi->needed_frames) { @@ -45,11 +58,20 @@ frame_next(BeamformFrameIterator *bfi) return result; } +static BeamformFrame * +frame_next_forwards(BeamformFrameIterator *bfi) +{ + BeamformFrame *result = 0; + if (bfi->cursor != bfi->needed_frames) { + u32 index = (bfi->offset + bfi->cursor++) % bfi->capacity; + result = bfi->frames + index; + } + return result; +} + static void alloc_beamform_frame(GLParams *gp, BeamformFrame *out, uv3 out_dim, u32 frame_index, s8 name) { - glDeleteTextures(1, &out->texture); - out->dim.x = CLAMP(round_down_power_of_2(ORONE(out_dim.x)), 1, gp->max_3d_texture_dim); out->dim.y = CLAMP(round_down_power_of_2(ORONE(out_dim.y)), 1, gp->max_3d_texture_dim); out->dim.z = CLAMP(round_down_power_of_2(ORONE(out_dim.z)), 1, gp->max_3d_texture_dim); @@ -67,9 +89,13 @@ alloc_beamform_frame(GLParams *gp, BeamformFrame *out, uv3 out_dim, u32 frame_in stream_append_u64(&label, frame_index); stream_append_s8(&label, s8("]")); + glDeleteTextures(1, &out->texture); glCreateTextures(GL_TEXTURE_3D, 1, &out->texture); glTextureStorage3D(out->texture, out->mips, GL_RG32F, out->dim.x, out->dim.y, out->dim.z); LABEL_GL_OBJECT(GL_TEXTURE, out->texture, stream_to_s8(&label)); + + glDeleteQueries(ARRAY_COUNT(out->timer_ids), out->timer_ids); + glCreateQueries(GL_TIME_ELAPSED, ARRAY_COUNT(out->timer_ids), out->timer_ids); } static void @@ -82,8 +108,13 @@ alloc_output_image(BeamformerCtx *ctx, uv3 output_dim) uv3 odim = ctx->averaged_frame.dim; UnloadRenderTexture(ctx->fsctx.output); + /* TODO(rnp): sometimes when accepting data on w32 something happens + * and the program will stall in vprintf in TraceLog(...) here. + * for now do this to avoid the problem */ + SetTraceLogLevel(LOG_NONE); /* TODO: select odim.x vs odim.y */ ctx->fsctx.output = LoadRenderTexture(odim.x, odim.z); + SetTraceLogLevel(LOG_INFO); LABEL_GL_OBJECT(GL_FRAMEBUFFER, ctx->fsctx.output.id, s8("Rendered_View")); GenTextureMipmaps(&ctx->fsctx.output.texture); //SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_ANISOTROPIC_8X); @@ -181,110 +212,53 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a) static BeamformWork * beamform_work_queue_pop(BeamformWorkQueue *q) { - BeamformWork *result = q->first; - if (result) { - switch (result->type) { - case BW_FULL_COMPUTE: - case BW_RECOMPUTE: - case BW_PARTIAL_COMPUTE: - /* NOTE: only one compute is allowed per frame */ - if (q->did_compute_this_frame) { - result = 0; - } else { - q->compute_in_flight--; - q->did_compute_this_frame = 1; - ASSERT(q->compute_in_flight >= 0); - } - break; - } - } - /* NOTE: only do this once we have determined if we are doing the work */ - if (result) { - q->first = result->next; - if (result == q->last) { - ASSERT(result->next == 0); - q->last = 0; - } - } + BeamformWork *result = 0; + + static_assert(ISPOWEROF2(ARRAY_COUNT(q->work_items)), "queue capacity must be a power of 2"); + u64 val = atomic_load(&q->queue); + u64 mask = ARRAY_COUNT(q->work_items) - 1; + u32 widx = val & mask; + u32 ridx = val >> 32 & mask; + + if (ridx != widx) + result = q->work_items + ridx; return result; } -static BeamformWork * -beamform_work_queue_push(BeamformerCtx *ctx, Arena *a, enum beamform_work work_type) +static void +beamform_work_queue_pop_commit(BeamformWorkQueue *q) { - /* TODO: we should have a sub arena specifically for this purpose */ + atomic_add(&q->queue, 0x100000000ULL); +} - BeamformWorkQueue *q = &ctx->beamform_work_queue; - ComputeShaderCtx *cs = &ctx->csctx; +DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_FN(beamform_work_queue_push) +{ + BeamformWork *result = 0; - BeamformWork *result = q->next_free; - if (result) q->next_free = result->next; - else result = alloc(a, typeof(*result), 1); - - if (result) { - result->type = work_type; - result->next = 0; - - switch (work_type) { - case BW_FULL_COMPUTE: - if (q->compute_in_flight >= ARRAY_COUNT(cs->raw_data_fences)) { - result->next = q->next_free; - q->next_free = result; - result = 0; - break; - } - cs->raw_data_index++; - if (cs->raw_data_index >= ARRAY_COUNT(cs->raw_data_fences)) - cs->raw_data_index = 0; - /* FALLTHROUGH */ - case BW_RECOMPUTE: { - i32 raw_index = cs->raw_data_index; - result->compute_ctx.raw_data_ssbo_index = raw_index; - /* NOTE: if this times out it means the command queue is more than 3 - * frames behind. In that case we need to re-evaluate the buffer size */ - if (cs->raw_data_fences[raw_index]) { - i32 result = glClientWaitSync(cs->raw_data_fences[raw_index], 0, - 10000); - if (result == GL_TIMEOUT_EXPIRED) { - //ASSERT(0); - } - glDeleteSync(cs->raw_data_fences[raw_index]); - cs->raw_data_fences[raw_index] = NULL; - } - ctx->displayed_frame_index++; - if (ctx->displayed_frame_index >= ARRAY_COUNT(ctx->beamform_frames)) - ctx->displayed_frame_index = 0; - result->compute_ctx.frame = ctx->beamform_frames + ctx->displayed_frame_index; - result->compute_ctx.first_pass = 1; - - BeamformFrameIterator bfi = beamform_frame_iterator(ctx); - for (BeamformFrame *frame = frame_next(&bfi); frame; frame = frame_next(&bfi)) { - uv3 try_dim = ctx->params->raw.output_points.xyz; - if (!uv3_equal(frame->dim, try_dim)) { - u32 index = (bfi.offset - bfi.cursor) % bfi.capacity; - alloc_beamform_frame(&ctx->gl, frame, try_dim, index, - s8("Beamformed_Data")); - } - } - } /* FALLTHROUGH */ - case BW_PARTIAL_COMPUTE: - q->compute_in_flight++; - case BW_SAVE_FRAME: - case BW_SEND_FRAME: - case BW_SSBO_COPY: - break; - } + static_assert(ISPOWEROF2(ARRAY_COUNT(q->work_items)), "queue capacity must be a power of 2"); + u64 val = atomic_load(&q->queue); + u64 mask = ARRAY_COUNT(q->work_items) - 1; + u32 widx = val & mask; + u32 ridx = val >> 32 & mask; + u32 next = (widx + 1) & mask; - if (result) { - if (q->last) q->last = q->last->next = result; - else q->last = q->first = result; - } + if (val & 0x80000000) + atomic_and(&q->queue, ~0x80000000); + + if (next != ridx) { + result = q->work_items + widx; + zero_struct(result); } return result; } +DEBUG_EXPORT BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit) +{ + atomic_add(&q->queue, 1); +} + static void export_frame(BeamformerCtx *ctx, iptr handle, BeamformFrame *frame) { @@ -294,7 +268,7 @@ export_frame(BeamformerCtx *ctx, iptr handle, BeamformFrame *frame) glGetTextureImage(frame->texture, 0, GL_RG, GL_FLOAT, out_size, ctx->export_buffer.beg); s8 raw = {.len = out_size, .data = ctx->export_buffer.beg}; if (!ctx->platform.write_file(handle, raw)) - TraceLog(LOG_WARNING, "failed to export frame\n"); + ctx->platform.write_file(ctx->platform.error_file_handle, s8("failed to export frame\n")); ctx->platform.close(handle); } @@ -316,57 +290,58 @@ do_sum_shader(ComputeShaderCtx *cs, u32 *in_textures, u32 in_texture_count, f32 } } -static void -do_beamform_shader(ComputeShaderCtx *cs, BeamformerParameters *bp, BeamformFrame *frame, - u32 rf_ssbo, iv3 dispatch_dim, iv3 compute_dim_offset, i32 compute_pass) -{ - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, rf_ssbo); - glUniform3iv(cs->volume_export_dim_offset_id, 1, compute_dim_offset.E); - glUniform1i(cs->volume_export_pass_id, compute_pass); - - glBindImageTexture(0, frame->texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F); - glDispatchCompute(ORONE(dispatch_dim.x / 32), - ORONE(dispatch_dim.y), - ORONE(dispatch_dim.z / 32)); -} +struct compute_cursor { + iv3 cursor; + iv3 dispatch; + iv3 target; +}; -static b32 -do_partial_compute_step(BeamformerCtx *ctx, BeamformFrame *frame) +static struct compute_cursor +start_compute_cursor(uv3 dim, u32 max_points) { - ComputeShaderCtx *cs = &ctx->csctx; - PartialComputeCtx *pc = &ctx->partial_compute_ctx; + struct compute_cursor result = {0}; + u32 invocations_per_dispatch = DAS_LOCAL_SIZE_X * DAS_LOCAL_SIZE_Y * DAS_LOCAL_SIZE_Z; - b32 done = 0; + result.dispatch.y = MIN(max_points / invocations_per_dispatch, MAX(dim.y / DAS_LOCAL_SIZE_Y, 1)); - /* NOTE: we start this elsewhere on the first dispatch so that we can include - * times such as decoding/demodulation/etc. */ - if (!pc->timer_active) { - glQueryCounter(pc->timer_ids[0], GL_TIMESTAMP); - pc->timer_active = 1; - } - - glBeginQuery(GL_TIME_ELAPSED, cs->timer_ids[cs->timer_index][pc->shader]); - cs->timer_active[cs->timer_index][pc->shader] = 1; + u32 remaining = max_points / result.dispatch.y; + result.dispatch.x = MIN(remaining / invocations_per_dispatch, MAX(dim.x / DAS_LOCAL_SIZE_X, 1)); + result.dispatch.z = MIN(remaining / (invocations_per_dispatch * result.dispatch.x), + MAX(dim.z / DAS_LOCAL_SIZE_Z, 1)); - glUseProgram(cs->programs[pc->shader]); + result.target.x = MAX(dim.x / result.dispatch.x / DAS_LOCAL_SIZE_X, 1); + result.target.y = MAX(dim.y / result.dispatch.y / DAS_LOCAL_SIZE_Y, 1); + result.target.z = MAX(dim.z / result.dispatch.z / DAS_LOCAL_SIZE_Z, 1); - /* NOTE: We must tile this otherwise GL will kill us for taking too long */ - /* TODO: this could be based on multiple dimensions */ - i32 dispatch_count = frame->dim.z / 32; - iv3 dim_offset = {.z = !!dispatch_count * 32 * pc->dispatch_index++}; - iv3 dispatch_dim = {.x = frame->dim.x, .y = frame->dim.y, .z = 1}; - do_beamform_shader(cs, &ctx->params->raw, frame, pc->rf_data_ssbo, dispatch_dim, dim_offset, 1); + return result; +} - if (pc->dispatch_index >= dispatch_count) { - pc->dispatch_index = 0; - done = 1; +static iv3 +step_compute_cursor(struct compute_cursor *cursor) +{ + iv3 result = cursor->cursor; + result.x *= cursor->dispatch.x * DAS_LOCAL_SIZE_X; + result.y *= cursor->dispatch.y * DAS_LOCAL_SIZE_Y; + result.z *= cursor->dispatch.z * DAS_LOCAL_SIZE_Z; + + cursor->cursor.x += 1; + if (cursor->cursor.x >= cursor->target.x) { + cursor->cursor.x = 0; + cursor->cursor.y += 1; + if (cursor->cursor.y >= cursor->target.y) { + cursor->cursor.y = 0; + cursor->cursor.z += 1; + } } - glQueryCounter(pc->timer_ids[1], GL_TIMESTAMP); - - glEndQuery(GL_TIME_ELAPSED); + return result; +} - return done; +static b32 +compute_cursor_finished(struct compute_cursor *cursor) +{ + b32 result = cursor->cursor.z > cursor->target.z; + return result; } static void @@ -377,9 +352,6 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformFrame *frame, u32 raw uv2 rf_raw_dim = ctx->params->raw.rf_raw_dim; size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16); - glBeginQuery(GL_TIME_ELAPSED, csctx->timer_ids[csctx->timer_index][shader]); - csctx->timer_active[csctx->timer_index][shader] = 1; - glUseProgram(csctx->programs[shader]); u32 output_ssbo_idx = !csctx->last_output_ssbo_index; @@ -430,168 +402,277 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformFrame *frame, u32 raw } } break; case CS_DAS: { - u32 rf_ssbo = csctx->rf_data_ssbos[input_ssbo_idx]; - iv3 dispatch_dim = {.x = frame->dim.x, .y = frame->dim.y, .z = frame->dim.z}; - do_beamform_shader(csctx, &ctx->params->raw, frame, rf_ssbo, dispatch_dim, (iv3){0}, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]); + glBindImageTexture(0, frame->texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F); + + #if 1 + /* TODO(rnp): compute max_points_per_dispatch based on something like a + * transmit_count * channel_count product */ + u32 max_points_per_dispatch = KB(64); + struct compute_cursor cursor = start_compute_cursor(frame->dim, max_points_per_dispatch); + for (iv3 offset = step_compute_cursor(&cursor); + !compute_cursor_finished(&cursor); + offset = step_compute_cursor(&cursor)) + { + /* IMPORTANT(rnp): prevents OS from coalescing and killing our shader */ + glFinish(); + glUniform3iv(csctx->voxel_offset_id, 1, offset.E); + glDispatchCompute(cursor.dispatch.x, cursor.dispatch.y, cursor.dispatch.z); + } + #else + /* NOTE(rnp): use this for testing tiling code. The performance of the above path + * should be the same as this path if everything is working correctly */ + iv3 compute_dim_offset = {0}; + glUniform3iv(csctx->voxel_offset_id, 1, compute_dim_offset.E); + glDispatchCompute(ORONE(frame->dim.x / 32), + ORONE(frame->dim.y), + ORONE(frame->dim.z / 32)); + #endif } break; case CS_SUM: { u32 frame_count = 0; u32 *in_textures = alloc(&arena, u32, MAX_BEAMFORMED_SAVED_FRAMES); - BeamformFrameIterator bfi = beamform_frame_iterator(ctx); - for (BeamformFrame *frame = frame_next(&bfi); frame; frame = frame_next(&bfi)) + BeamformFrameIterator bfi = beamform_frame_iterator(ctx, ctx->display_frame_index, + ctx->params->raw.output_points.w); + for (BeamformFrame *frame = frame_next_backwards(&bfi); + frame; + frame = frame_next_backwards(&bfi)) + { in_textures[frame_count++] = frame->texture; + } do_sum_shader(csctx, in_textures, frame_count, 1 / (f32)frame_count, ctx->averaged_frame.texture, ctx->averaged_frame.dim); } break; default: ASSERT(0); } - - glEndQuery(GL_TIME_ELAPSED); } -static BeamformFrame * -start_beamform_compute_work(BeamformWork *work, ComputeShaderCtx *cs, BeamformerParametersFull *bpf) +static u32 +compile_shader(Platform *platform, Arena a, u32 type, s8 shader, s8 name) { - BeamformFrame *result = work->compute_ctx.frame; - if (bpf->upload) { - glNamedBufferSubData(cs->shared_ubo, 0, sizeof(bpf->raw), &bpf->raw); - bpf->upload = 0; + u32 sid = glCreateShader(type); + glShaderSource(sid, 1, (const char **)&shader.data, (int *)&shader.len); + glCompileShader(sid); + + i32 res = 0; + glGetShaderiv(sid, GL_COMPILE_STATUS, &res); + + if (res == GL_FALSE) { + Stream buf = arena_stream(&a); + stream_append_s8(&buf, name); + stream_append_s8(&buf, s8(": failed to compile\n")); + + i32 len = 0, out_len = 0; + glGetShaderiv(sid, GL_INFO_LOG_LENGTH, &len); + glGetShaderInfoLog(sid, len, &out_len, (char *)(buf.data + buf.widx)); + buf.widx += out_len; + glDeleteShader(sid); + platform->write_file(platform->error_file_handle, stream_to_s8(&buf)); + + sid = 0; } - result->min_coordinate = bpf->raw.output_min_coordinate; - result->max_coordinate = bpf->raw.output_max_coordinate; + return sid; +} +static u32 +link_program(Platform *platform, Arena a, u32 shader_id) +{ + i32 success = 0; + u32 result = glCreateProgram(); + glAttachShader(result, shader_id); + glLinkProgram(result); + glGetProgramiv(result, GL_LINK_STATUS, &success); + if (success == GL_FALSE) { + i32 len = 0; + Stream buf = arena_stream(&a); + stream_append_s8(&buf, s8("shader link error: ")); + glGetProgramInfoLog(result, buf.cap - buf.widx, &len, (c8 *)(buf.data + buf.widx)); + buf.widx = len; + stream_append_byte(&buf, '\n'); + platform->write_file(platform->error_file_handle, stream_to_s8(&buf)); + glDeleteProgram(result); + result = 0; + } return result; } static void -do_beamform_work(BeamformerCtx *ctx, Arena *a) +reload_compute_shader(BeamformerCtx *ctx, s8 path, ComputeShaderReloadContext *csr, Arena tmp) +{ + ComputeShaderCtx *cs = &ctx->csctx; + + /* NOTE: arena works as stack (since everything here is 1 byte aligned) */ + s8 header_in_arena = {.data = tmp.beg}; + if (csr->needs_header) + header_in_arena = push_s8(&tmp, s8(COMPUTE_SHADER_HEADER)); + + s8 shader_text = ctx->platform.read_whole_file(&tmp, (c8 *)path.data); + shader_text.data -= header_in_arena.len; + shader_text.len += header_in_arena.len; + + if (shader_text.data == header_in_arena.data) { + u32 shader_id = compile_shader(&ctx->platform, tmp, GL_COMPUTE_SHADER, shader_text, path); + if (shader_id) { + u32 new_program = link_program(&ctx->platform, tmp, shader_id); + if (new_program) { + Stream buf = arena_stream(&tmp); + stream_append_s8(&buf, s8("loaded: ")); + stream_append_s8(&buf, path); + stream_append_byte(&buf, '\n'); + ctx->platform.write_file(ctx->platform.error_file_handle, + stream_to_s8(&buf)); + glDeleteProgram(cs->programs[csr->shader]); + cs->programs[csr->shader] = new_program; + glUseProgram(cs->programs[csr->shader]); + glBindBufferBase(GL_UNIFORM_BUFFER, 0, cs->shared_ubo); + LABEL_GL_OBJECT(GL_PROGRAM, cs->programs[csr->shader], csr->label); + } + } + + glDeleteShader(shader_id); + } else { + Stream buf = arena_stream(&tmp); + stream_append_s8(&buf, s8("failed to load: ")); + stream_append_s8(&buf, path); + stream_append_byte(&buf, '\n'); + ctx->platform.write_file(ctx->platform.error_file_handle, stream_to_s8(&buf)); + /* TODO(rnp): return an error and don't let the work item calling this function + * call pop off the queue; store a retry count and only fail after multiple tries */ + } +} + +DEBUG_EXPORT BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute) { - BeamformWorkQueue *q = &ctx->beamform_work_queue; + BeamformerCtx *ctx = (BeamformerCtx *)user_context; + BeamformWorkQueue *q = ctx->beamform_work_queue; BeamformWork *work = beamform_work_queue_pop(q); ComputeShaderCtx *cs = &ctx->csctx; + BeamformerParameters *bp = &ctx->params->raw; + + if (ctx->csctx.programs[CS_DAS]) + glProgramUniform1f(ctx->csctx.programs[CS_DAS], ctx->csctx.cycle_t_id, cycle_t); + while (work) { switch (work->type) { - case BW_PARTIAL_COMPUTE: { - BeamformFrame *frame = work->compute_ctx.frame; - - if (work->compute_ctx.first_pass) { - start_beamform_compute_work(work, cs, ctx->params); - - PartialComputeCtx *pc = &ctx->partial_compute_ctx; - pc->runtime = 0; - pc->timer_active = 1; - glQueryCounter(pc->timer_ids[0], GL_TIMESTAMP); - glDeleteBuffers(1, &pc->rf_data_ssbo); - glCreateBuffers(1, &pc->rf_data_ssbo); - glNamedBufferStorage(pc->rf_data_ssbo, decoded_data_size(cs), 0, 0); - LABEL_GL_OBJECT(GL_BUFFER, pc->rf_data_ssbo, s8("Volume_RF_SSBO")); - - /* TODO: maybe we should have some concept of compute shader - * groups, then we could define a group that does the decoding - * and filtering and apply that group directly here. For now - * we will do this dumb thing */ - u32 stage_count = ctx->params->compute_stages_count; - enum compute_shaders *stages = ctx->params->compute_stages; - for (u32 i = 0; i < stage_count; i++) { - if (stages[i] == CS_DAS) { - ctx->partial_compute_ctx.shader = stages[i]; - break; + case BW_RELOAD_SHADER: { + ComputeShaderReloadContext *csr = work->reload_shader_ctx; + reload_compute_shader(ctx, csr->path, csr, arena); + + /* TODO(rnp): remove this */ + #define X(idx, name) cs->name##_id = glGetUniformLocation(cs->programs[idx], "u_" #name); + CS_UNIFORMS + #undef X + } break; + case BW_LOAD_RF_DATA: { + u32 raw_index = cs->raw_data_index; + if (cs->raw_data_fences[raw_index]) { + GLsync fence = cs->raw_data_fences[raw_index]; + i32 status = glClientWaitSync(fence, 0, 0); + if (status != GL_ALREADY_SIGNALED) { + ctx->platform.write_file(ctx->platform.error_file_handle, + s8("stall while loading RF data\n")); + u64 timeout = ctx->gl.max_server_wait_time; + for (;;) { + status = glClientWaitSync(fence, 0, timeout); + if (status == GL_CONDITION_SATISFIED || + status == GL_ALREADY_SIGNALED) + { + break; + } } - do_compute_shader(ctx, *a, frame, - work->compute_ctx.raw_data_ssbo_index, - stages[i]); } - u32 output_ssbo = pc->rf_data_ssbo; - u32 input_ssbo = cs->rf_data_ssbos[cs->last_output_ssbo_index]; - size rf_size = decoded_data_size(cs); - glCopyNamedBufferSubData(input_ssbo, output_ssbo, 0, 0, rf_size); + glDeleteSync(cs->raw_data_fences[raw_index]); + cs->raw_data_fences[raw_index] = 0; } - b32 done = do_partial_compute_step(ctx, frame); - if (!done) { - BeamformWork *new; - /* NOTE: this push must not fail */ - new = beamform_work_queue_push(ctx, a, BW_PARTIAL_COMPUTE); - new->compute_ctx.first_pass = 0; - new->compute_ctx.frame = frame; - new->compute_ctx.export_handle = work->compute_ctx.export_handle; - } else if (work->compute_ctx.export_handle != INVALID_FILE) { - export_frame(ctx, work->compute_ctx.export_handle, frame); - work->compute_ctx.export_handle = INVALID_FILE; - /* NOTE: do not waste a bunch of GPU space holding onto the volume - * texture if it was just for export */ - glDeleteTextures(1, &frame->texture); - mem_clear(frame, 0, sizeof(*frame)); + if (!uv2_equal(cs->rf_raw_dim, bp->rf_raw_dim) || + !uv4_equal(cs->dec_data_dim, bp->dec_data_dim)) + { + alloc_shader_storage(ctx, arena); } + + uv2 rf_raw_dim = cs->rf_raw_dim; + size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16); + void *rf_data_buf = cs->raw_data_arena.beg + raw_index * rf_raw_size; + + size rlen = ctx->platform.read_file(work->file_handle, rf_data_buf, rf_raw_size); + if (rlen != rf_raw_size) { + stream_append_s8(&ctx->error_stream, s8("Partial Read Occurred: ")); + stream_append_i64(&ctx->error_stream, rlen); + stream_append_byte(&ctx->error_stream, '/'); + stream_append_i64(&ctx->error_stream, rf_raw_size); + stream_append_byte(&ctx->error_stream, '\n'); + ctx->platform.write_file(ctx->platform.error_file_handle, + stream_to_s8(&ctx->error_stream)); + ctx->error_stream.widx = 0; + } else { + switch (ctx->gl.vendor_id) { + case GL_VENDOR_AMD: + case GL_VENDOR_ARM: + case GL_VENDOR_INTEL: + break; + case GL_VENDOR_NVIDIA: + glNamedBufferSubData(cs->raw_data_ssbo, raw_index * rlen, + rlen, rf_data_buf); + } + } + ctx->ready_for_rf = 1; } break; - case BW_FULL_COMPUTE: - case BW_RECOMPUTE: { - BeamformFrame *frame = start_beamform_compute_work(work, cs, ctx->params); + case BW_COMPUTE: { + atomic_store(&cs->processing_compute, 1); + BeamformFrame *frame = work->frame; + if (ctx->params->upload) { + glNamedBufferSubData(cs->shared_ubo, 0, sizeof(ctx->params->raw), + &ctx->params->raw); + ctx->params->upload = 0; + } + + uv3 try_dim = ctx->params->raw.output_points.xyz; + if (!uv3_equal(try_dim, frame->dim)) { + size frame_index = frame - ctx->beamform_frames; + alloc_beamform_frame(&ctx->gl, frame, try_dim, frame_index, + s8("Beamformed_Data")); + } + + frame->in_flight = 1; + frame->min_coordinate = ctx->params->raw.output_min_coordinate; + frame->max_coordinate = ctx->params->raw.output_max_coordinate; u32 stage_count = ctx->params->compute_stages_count; enum compute_shaders *stages = ctx->params->compute_stages; - for (u32 i = 0; i < stage_count; i++) - do_compute_shader(ctx, *a, frame, work->compute_ctx.raw_data_ssbo_index, - stages[i]); - - if (work->compute_ctx.export_handle != INVALID_FILE) { - export_frame(ctx, work->compute_ctx.export_handle, frame); - work->compute_ctx.export_handle = INVALID_FILE; + for (u32 i = 0; i < stage_count; i++) { + frame->timer_active[stages[i]] = 1; + glBeginQuery(GL_TIME_ELAPSED, frame->timer_ids[stages[i]]); + do_compute_shader(ctx, arena, frame, cs->raw_data_index, stages[i]); + glEndQuery(GL_TIME_ELAPSED); + } + /* NOTE(rnp): block until work completes so that we can record timings */ + glFinish(); + + for (u32 i = 0; i < ARRAY_COUNT(frame->timer_ids); i++) { + u64 ns = 0; + if (frame->timer_active[i]) { + glGetQueryObjectui64v(frame->timer_ids[i], GL_QUERY_RESULT, &ns); + frame->timer_active[i] = 0; + } + frame->compute_times[i] = (f32)ns / 1e9; } - ctx->fsctx.gen_mipmaps = 1; + frame->ready_to_present = 1; + cs->processing_compute = 0; + } break; + case BW_SAVE_FRAME: { + BeamformFrame *frame = work->output_frame_ctx.frame; + ASSERT(frame->ready_to_present); + export_frame(ctx, work->output_frame_ctx.file_handle, frame); } break; } - - work->next = q->next_free; - q->next_free = work; + beamform_work_queue_pop_commit(q); work = beamform_work_queue_pop(q); } - - if (q->did_compute_this_frame) { - u32 tidx = ctx->csctx.timer_index; - glDeleteSync(ctx->csctx.timer_fences[tidx]); - ctx->csctx.timer_fences[tidx] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - ctx->csctx.timer_index = (tidx + 1) % ARRAY_COUNT(ctx->csctx.timer_fences); - } -} - -static void -check_compute_timers(ComputeShaderCtx *cs, PartialComputeCtx *pc, BeamformerParametersFull *bp) -{ - /* NOTE: volume generation running timer */ - if (pc->timer_active) { - u64 start_ns = 0, end_ns = 0; - glGetQueryObjectui64v(pc->timer_ids[0], GL_QUERY_RESULT, &start_ns); - glGetQueryObjectui64v(pc->timer_ids[1], GL_QUERY_RESULT, &end_ns); - u64 elapsed_ns = end_ns - start_ns; - pc->runtime += (f32)elapsed_ns * 1e-9; - pc->timer_active = 0; - } - - /* NOTE: main timers for display portion of the program */ - u32 last_idx = (cs->timer_index - 1) % ARRAY_COUNT(cs->timer_fences); - if (!cs->timer_fences[last_idx]) - return; - - i32 status = glClientWaitSync(cs->timer_fences[last_idx], 0, 0); - if (status == GL_TIMEOUT_EXPIRED || status == GL_WAIT_FAILED) - return; - glDeleteSync(cs->timer_fences[last_idx]); - cs->timer_fences[last_idx] = NULL; - - for (u32 i = 0; i < bp->compute_stages_count; i++) { - u64 ns = 0; - i32 idx = bp->compute_stages[i]; - if (cs->timer_active[last_idx][idx]) { - glGetQueryObjectui64v(cs->timer_ids[last_idx][idx], GL_QUERY_RESULT, &ns); - cs->timer_active[last_idx][idx] = 0; - } - cs->last_frame_time[idx] = (f32)ns / 1e9; - } } #include "ui.c" @@ -602,7 +683,6 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) cycle_t += dt_for_frame; if (cycle_t > 1) cycle_t -= 1; - glProgramUniform1f(ctx->csctx.programs[CS_DAS], ctx->csctx.cycle_t_id, cycle_t); if (IsWindowResized()) { ctx->window_size.h = GetScreenHeight(); @@ -613,83 +693,83 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) ui_init(ctx, ctx->ui_backing_store); } - if (ctx->flags & START_COMPUTE) { - if (ui_can_start_compute(ctx)) - ui_start_compute(ctx); - ctx->flags &= ~START_COMPUTE; + if (ctx->start_compute && !input->pipe_data_available) { + if (ctx->beamform_frames[ctx->display_frame_index].ready_to_present) { + BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue); + if (work) { + /* TODO(rnp): cleanup all the duplicates of this */ + work->type = BW_COMPUTE; + work->frame = ctx->beamform_frames + ctx->next_render_frame_index++; + work->frame->ready_to_present = 0; + if (ctx->next_render_frame_index >= ARRAY_COUNT(ctx->beamform_frames)) + ctx->next_render_frame_index = 0; + beamform_work_queue_push_commit(ctx->beamform_work_queue); + } + } + ctx->platform.wake_thread(ctx->platform.compute_worker.sync_handle); + ctx->start_compute = 0; } - /* NOTE: Store the compute time for the last frame. */ - check_compute_timers(&ctx->csctx, &ctx->partial_compute_ctx, ctx->params); - BeamformerParameters *bp = &ctx->params->raw; - /* NOTE: Check for and Load RF Data into GPU */ - if (input->pipe_data_available) { - BeamformWork *work = beamform_work_queue_push(ctx, arena, BW_FULL_COMPUTE); - /* NOTE: we can only read in the new data if we get back a work item. - * otherwise we have too many frames in flight and should wait until the - * next frame to try again */ + if (ctx->ready_for_rf && input->pipe_data_available) { + BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue); if (work) { - ComputeShaderCtx *cs = &ctx->csctx; - - if (ctx->params->export_next_frame) { - /* TODO: we don't really want the beamformer opening/closing files */ - iptr f = ctx->platform.open_for_write(ctx->params->export_pipe_name); - work->compute_ctx.export_handle = f; - ctx->params->export_next_frame = 0; - } else { - work->compute_ctx.export_handle = INVALID_FILE; - } - - b32 output_3d = bp->output_points.x > 1 && bp->output_points.y > 1 && - bp->output_points.z > 1; - - if (output_3d) { - work->type = BW_PARTIAL_COMPUTE; - BeamformFrame *frame = &ctx->partial_compute_ctx.frame; - uv3 out_dim = ctx->params->raw.output_points.xyz; - alloc_beamform_frame(&ctx->gl, frame, out_dim, 0, s8("Beamformed_Volume")); - work->compute_ctx.frame = frame; + ctx->start_compute = 1; + ctx->ready_for_rf = 0; + + work->type = BW_LOAD_RF_DATA; + work->file_handle = input->pipe_handle; + beamform_work_queue_push_commit(ctx->beamform_work_queue); + + BeamformWork *compute = beamform_work_queue_push(ctx->beamform_work_queue); + if (compute) { + compute->type = BW_COMPUTE; + compute->frame = ctx->beamform_frames + ctx->next_render_frame_index++; + compute->frame->ready_to_present = 0; + if (ctx->next_render_frame_index >= ARRAY_COUNT(ctx->beamform_frames)) + ctx->next_render_frame_index = 0; + beamform_work_queue_push_commit(ctx->beamform_work_queue); + + if (ctx->params->export_next_frame) { + BeamformWork *export = beamform_work_queue_push(ctx->beamform_work_queue); + if (export) { + /* TODO: we don't really want the beamformer opening/closing files */ + iptr f = ctx->platform.open_for_write(ctx->params->export_pipe_name); + export->type = BW_SAVE_FRAME; + export->output_frame_ctx.file_handle = f; + export->output_frame_ctx.frame = compute->frame; + beamform_work_queue_push_commit(ctx->beamform_work_queue); + } + ctx->params->export_next_frame = 0; + } } - if (!uv2_equal(cs->rf_raw_dim, bp->rf_raw_dim) || - !uv4_equal(cs->dec_data_dim, bp->dec_data_dim)) - { - alloc_shader_storage(ctx, *arena); + if (ctx->params->upload) { + /* TODO(rnp): clean this up */ + ctx->ui->read_params = 1; } - u32 raw_index = work->compute_ctx.raw_data_ssbo_index; - uv2 rf_raw_dim = cs->rf_raw_dim; - size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16); - void *rf_data_buf = cs->raw_data_arena.beg + raw_index * rf_raw_size; - alloc_output_image(ctx, bp->output_points.xyz); + } + } - size rlen = ctx->platform.read_pipe(input->pipe_handle, rf_data_buf, rf_raw_size); - if (rlen != rf_raw_size) { - stream_append_s8(&ctx->error_stream, s8("Partial Read Occurred: ")); - stream_append_i64(&ctx->error_stream, rlen); - stream_append_byte(&ctx->error_stream, '/'); - stream_append_i64(&ctx->error_stream, rf_raw_size); - stream_append_s8(&ctx->error_stream, s8("\n\0")); - TraceLog(LOG_WARNING, (c8 *)stream_to_s8(&ctx->error_stream).data); - ctx->error_stream.widx = 0; - } else { - switch (ctx->gl.vendor_id) { - case GL_VENDOR_AMD: - case GL_VENDOR_ARM: - case GL_VENDOR_INTEL: - break; - case GL_VENDOR_NVIDIA: - glNamedBufferSubData(cs->raw_data_ssbo, raw_index * rlen, - rlen, rf_data_buf); - } - } + BeamformFrameIterator bfi = beamform_frame_iterator(ctx, ctx->display_frame_index, + ctx->next_render_frame_index); + for (BeamformFrame *frame = frame_next_forwards(&bfi); + frame; + frame = frame_next_forwards(&bfi)) + { + if (frame->in_flight && frame->ready_to_present) { + frame->in_flight = 0; + ctx->display_frame_index = (bfi.offset + bfi.cursor - 1) % bfi.capacity; + ctx->fsctx.gen_mipmaps = 1; } } - ctx->beamform_work_queue.did_compute_this_frame = 0; - do_beamform_work(ctx, arena); + if (ctx->start_compute) { + ctx->start_compute = 0; + ctx->platform.wake_thread(ctx->platform.compute_worker.sync_handle); + } /* NOTE: draw output image texture using render fragment shader */ BeamformFrame *frame_to_draw = 0; @@ -703,8 +783,8 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) frame_to_draw = &ctx->averaged_frame; out_texture = ctx->averaged_frame.texture; } else { - frame_to_draw = ctx->beamform_frames + ctx->displayed_frame_index; - out_texture = frame_to_draw->texture; + frame_to_draw = ctx->beamform_frames + ctx->display_frame_index; + out_texture = frame_to_draw->ready_to_present ? frame_to_draw->texture : 0; } glBindTextureUnit(0, out_texture); glUniform1f(fs->db_cutoff_id, fs->db); @@ -725,5 +805,5 @@ DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) draw_ui(ctx, input, frame_to_draw); if (WindowShouldClose()) - ctx->flags |= SHOULD_EXIT; + ctx->should_exit = 1; } diff --git a/beamformer.h b/beamformer.h @@ -5,7 +5,7 @@ #include <glad.h> #define GRAPHICS_API_OPENGL_43 -#include <raylib.h> +#include <raylib_extended.h> #include <rlgl.h> #include "util.h" @@ -33,11 +33,6 @@ /* TODO: multiple views */ #define MAX_DISPLAYS 1 -enum program_flags { - SHOULD_EXIT = 1 << 0, - START_COMPUTE = 1 << 1, -}; - enum gl_vendor_ids { GL_VENDOR_AMD, GL_VENDOR_ARM, @@ -105,24 +100,13 @@ typedef struct { } ScaleBar; typedef struct { - TempArena frame_temporary_arena; - Arena arena_for_frame; + b32 executable_reloaded; + b32 pipe_data_available; + iptr pipe_handle; - Font font; - Font small_font; - f32 font_height; - f32 small_font_height; - - InteractionState interaction; - InputState text_input_state; - - ScaleBar scale_bars[MAX_DISPLAYS][2]; - v2_sll *scale_bar_savepoint_freelist; - - v2 ruler_start_p; - v2 ruler_stop_p; - u32 ruler_state; -} BeamformerUI; + v2 mouse; + v2 last_mouse; +} BeamformerInput; #define MAX_FRAMES_IN_FLIGHT 3 @@ -157,6 +141,7 @@ typedef struct { } CudaLib; #include "beamformer_parameters.h" + typedef struct { BeamformerParameters raw; enum compute_shaders compute_stages[16]; @@ -166,22 +151,42 @@ typedef struct { c8 export_pipe_name[1024]; } BeamformerParametersFull; -#define CS_UNIFORMS \ - X(CS_DAS, volume_export_dim_offset) \ - X(CS_DAS, volume_export_pass) \ - X(CS_DAS, cycle_t) \ - X(CS_MIN_MAX, mips_level) \ +typedef struct { + TempArena frame_temporary_arena; + Arena arena_for_frame; + + Font font; + Font small_font; + f32 font_height; + f32 small_font_height; + + InteractionState interaction; + InputState text_input_state; + + ScaleBar scale_bars[MAX_DISPLAYS][2]; + v2_sll *scale_bar_savepoint_freelist; + + v2 ruler_start_p; + v2 ruler_stop_p; + u32 ruler_state; + + BeamformerUIParameters params; + b32 flush_params; + /* TODO(rnp): this is nasty and should be removed */ + b32 read_params; + + iptr last_displayed_frame; +} BeamformerUI; + +#define CS_UNIFORMS \ + X(CS_DAS, voxel_offset) \ + X(CS_DAS, cycle_t) \ + X(CS_MIN_MAX, mips_level) \ X(CS_SUM, sum_prescale) typedef struct { u32 programs[CS_LAST]; - u32 timer_index; - u32 timer_ids[MAX_FRAMES_IN_FLIGHT][CS_LAST]; - b32 timer_active[MAX_FRAMES_IN_FLIGHT][CS_LAST]; - GLsync timer_fences[MAX_FRAMES_IN_FLIGHT]; - f32 last_frame_time[CS_LAST]; - /* NOTE: the raw_data_ssbo is allocated at 3x the required size to allow for tiled * transfers when the GPU is running behind the CPU. It is not mapped on NVIDIA because * their drivers _will_ store the buffer in the system memory. This doesn't happen @@ -201,6 +206,8 @@ typedef struct { u32 shared_ubo; + b32 processing_compute; + uv4 dec_data_dim; uv2 rf_raw_dim; @@ -230,17 +237,13 @@ typedef struct { v4 max_coordinate; u32 mips; -} BeamformFrame; + b32 in_flight; + b32 ready_to_present; -typedef struct { - BeamformFrame frame; - u32 timer_ids[2]; - f32 runtime; - u32 rf_data_ssbo; - u32 shader; - u32 dispatch_index; - b32 timer_active; -} PartialComputeCtx; + u32 timer_ids[CS_LAST]; + f32 compute_times[CS_LAST]; + b32 timer_active[CS_LAST]; +} BeamformFrame; typedef struct { enum gl_vendor_ids vendor_id; @@ -250,51 +253,47 @@ typedef struct { i32 max_3d_texture_dim; i32 max_ssbo_size; i32 max_ubo_size; + i32 max_server_wait_time; } GLParams; enum beamform_work { - BW_FULL_COMPUTE, - BW_RECOMPUTE, - BW_PARTIAL_COMPUTE, + BW_COMPUTE, + BW_LOAD_RF_DATA, + BW_RELOAD_SHADER, BW_SAVE_FRAME, BW_SEND_FRAME, - BW_SSBO_COPY, }; typedef struct { - u32 source_ssbo; - u32 dest_ssbo; -} BeamformSSBOCopy; - -typedef struct { - BeamformFrame *frame; - iptr export_handle; - u32 raw_data_ssbo_index; - b32 first_pass; -} BeamformCompute; + void *beamformer_ctx; + s8 label; + s8 path; + u32 shader; + b32 needs_header; +} ComputeShaderReloadContext; typedef struct { BeamformFrame *frame; - iptr output_handle; -} BeamformOutputFrame; + iptr file_handle; +} BeamformOutputFrameContext; /* NOTE: discriminated union based on type */ -typedef struct BeamformWork { - struct BeamformWork *next; +typedef struct { union { - BeamformSSBOCopy ssbo_copy_ctx; - BeamformCompute compute_ctx; - BeamformOutputFrame output_frame_ctx; + iptr file_handle; + BeamformFrame *frame; + BeamformOutputFrameContext output_frame_ctx; + ComputeShaderReloadContext *reload_shader_ctx; }; u32 type; } BeamformWork; typedef struct { - BeamformWork *first; - BeamformWork *last; - BeamformWork *next_free; - i32 compute_in_flight; - b32 did_compute_this_frame; + union { + u64 queue; + struct {u32 widx, ridx;}; + }; + BeamformWork work_items[1 << 6]; } BeamformWorkQueue; typedef struct { @@ -309,19 +308,23 @@ typedef struct BeamformerCtx { GLParams gl; uv2 window_size; - u32 flags; + b32 start_compute; + b32 should_exit; + + /* TODO(rnp): is there a better way of tracking this? */ + b32 ready_for_rf; Arena ui_backing_store; BeamformerUI *ui; BeamformFrame beamform_frames[MAX_BEAMFORMED_SAVED_FRAMES]; - u32 displayed_frame_index; + u32 next_render_frame_index; + u32 display_frame_index; /* NOTE: this will only be used when we are averaging */ BeamformFrame averaged_frame; ComputeShaderCtx csctx; FragmentShaderCtx fsctx; - PartialComputeCtx partial_compute_ctx; Arena export_buffer; @@ -329,7 +332,7 @@ typedef struct BeamformerCtx { Platform platform; Stream error_stream; - BeamformWorkQueue beamform_work_queue; + BeamformWorkQueue *beamform_work_queue; BeamformerParametersFull *params; } BeamformerCtx; @@ -340,4 +343,13 @@ typedef struct BeamformerCtx { BeamformerInput *input) typedef BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step_fn); +#define BEAMFORMER_COMPLETE_COMPUTE_FN(name) void name(iptr user_context, Arena arena) +typedef BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute_fn); + +#define BEAMFORM_WORK_QUEUE_PUSH_FN(name) BeamformWork *name(BeamformWorkQueue *q) +typedef BEAMFORM_WORK_QUEUE_PUSH_FN(beamform_work_queue_push_fn); + +#define BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(name) void name(BeamformWorkQueue *q) +typedef BEAMFORM_WORK_QUEUE_PUSH_COMMIT_FN(beamform_work_queue_push_commit_fn); + #endif /*_BEAMFORMER_H_ */ diff --git a/beamformer_parameters.h b/beamformer_parameters.h @@ -29,6 +29,18 @@ enum compute_shaders { #define MAX_BEAMFORMED_SAVED_FRAMES 16 /* NOTE: This struct follows the OpenGL std140 layout. DO NOT modify unless you have * read and understood the rules, particulary with regards to _member alignment_ */ + +typedef struct { + v4 output_min_coordinate; /* [m] Back-Top-Left corner of output region (w ignored) */ + v4 output_max_coordinate; /* [m] Front-Bottom-Right corner of output region (w ignored)*/ + f32 sampling_frequency; /* [Hz] */ + f32 center_frequency; /* [Hz] */ + f32 speed_of_sound; /* [m/s] */ + f32 off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */ + i32 beamform_plane; /* Plane to Beamform in 2D HERCULES */ + f32 f_number; /* F# (set to 0 to disable) */ +} BeamformerUIParameters; + typedef struct { u16 channel_mapping[256]; /* Transducer Channel to Verasonics Channel */ u16 uforces_channels[256]; /* Channels used for virtual UFORCES elements */ @@ -37,24 +49,33 @@ typedef struct { f32 xdc_transform[16]; /* IMPORTANT: column major order */ uv4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */ uv4 output_points; /* Width * Height * Depth * (Frame Average Count) */ - v4 output_min_coordinate; /* [m] Back-Top-Left corner of output region (w ignored) */ - v4 output_max_coordinate; /* [m] Front-Bottom-Right corner of output region (w ignored)*/ f32 xdc_element_pitch[2]; /* [m] Transducer Element Pitch {row, col} */ uv2 rf_raw_dim; /* Raw Data Dimensions */ i32 transmit_mode; /* Method/Orientation of Transmit */ u32 decode; /* Decode or just reshape data */ - f32 speed_of_sound; /* [m/s] */ + u32 das_shader_id; + f32 time_offset; /* pulse length correction time [s] */ + + /* TODO(rnp): actually use a substruct but generate a header compatible with MATLAB */ + /* UI Parameters */ + v4 output_min_coordinate; /* [m] Back-Top-Left corner of output region (w ignored) */ + v4 output_max_coordinate; /* [m] Front-Bottom-Right corner of output region (w ignored)*/ f32 sampling_frequency; /* [Hz] */ f32 center_frequency; /* [Hz] */ - f32 time_offset; /* pulse length correction time [s] */ + f32 speed_of_sound; /* [m/s] */ f32 off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */ i32 beamform_plane; /* Plane to Beamform in 2D HERCULES */ f32 f_number; /* F# (set to 0 to disable) */ - u32 das_shader_id; + u32 readi_group_id; /* Which readi group this data is from */ u32 readi_group_size; /* Size of readi transmit group */ } BeamformerParameters; +_Static_assert((offsetof(BeamformerParameters, output_min_coordinate) & 15) == 0, + "BeamformerParameters.output_min_coordinate must lie on a 16 byte boundary"); +_Static_assert((sizeof(BeamformerParameters) & 15) == 0, + "sizeof(BeamformerParameters) must be a multiple of 16"); + /* NOTE: garbage to get the prepocessor to properly stringize the value of a macro */ #define str_(x) #x #define str(x) str_(x) @@ -70,20 +91,20 @@ layout(std140, binding = 0) uniform parameters {\n\ mat4 xdc_transform; /* IMPORTANT: column major order */\n\ uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */\n\ uvec4 output_points; /* Width * Height * Depth * (Frame Average Count) */\n\ - vec4 output_min_coord; /* [m] Top left corner of output region */\n\ - vec4 output_max_coord; /* [m] Bottom right corner of output region */\n\ vec2 xdc_element_pitch; /* [m] Transducer Element Pitch {row, col} */\n\ uvec2 rf_raw_dim; /* Raw Data Dimensions */\n\ int transmit_mode; /* Method/Orientation of Transmit */\n\ uint decode; /* Decode or just reshape data */\n\ - float speed_of_sound; /* [m/s] */\n\ + uint das_shader_id;\n\ + float time_offset; /* pulse length correction time [s] */\n\ + vec4 output_min_coord; /* [m] Top left corner of output region */\n\ + vec4 output_max_coord; /* [m] Bottom right corner of output region */\n\ float sampling_frequency; /* [Hz] */\n\ float center_frequency; /* [Hz] */\n\ - float time_offset; /* pulse length correction time [s] */\n\ + float speed_of_sound; /* [m/s] */\n\ float off_axis_pos; /* [m] Position on screen normal to beamform in 2D HERCULES */\n\ int beamform_plane; /* Plane to Beamform in 2D HERCULES */\n\ float f_number; /* F# (set to 0 to disable) */\n\ - uint das_shader_id;\n\ uint readi_group_id; /* Which readi group this data is from */\n\ uint readi_group_size; /* Size of readi transmit group */\n\ };\n\ @@ -97,4 +118,9 @@ layout(std140, binding = 0) uniform parameters {\n\ #define DAS_ID_RCA_VLS " str(DAS_ID_RCA_VLS) "\n\ #define DAS_ID_RCA_TPW " str(DAS_ID_RCA_TPW) "\n\ \n\ -#line 0\n" +#line 1\n" + +/* TODO(rnp): bake this into the das shader header */ +#define DAS_LOCAL_SIZE_X 32 +#define DAS_LOCAL_SIZE_Y 1 +#define DAS_LOCAL_SIZE_Z 32 diff --git a/build.sh b/build.sh @@ -59,22 +59,22 @@ mkdir -p external/lib build_raylib() { - cp external/raylib/src/raylib.h external/raylib/src/rlgl.h external/include/ + cp external/raylib/src/rlgl.h external/include/ cppflags="${2} -DPLATFORM_DESKTOP_GLFW -DGRAPHICS_API_OPENGL_43" cppflags="${cppflags} -Iexternal/raylib/src -Iexternal/raylib/src/external/glfw/include" case ${1} in shared) ${cc} ${cflags} ${cppflags} -fPIC -shared -DBUILD_LIBTYPE_SHARED \ - external/raylib/src/raudio.c external/raylib/src/rcore.c \ - external/raylib/src/rmodels.c external/raylib/src/rshapes.c \ - external/raylib/src/rtext.c external/raylib/src/rtextures.c \ - external/raylib/src/utils.c \ + external/rcore_extended.c \ + external/raylib/src/raudio.c external/raylib/src/rmodels.c \ + external/raylib/src/rshapes.c external/raylib/src/rtext.c \ + external/raylib/src/rtextures.c external/raylib/src/utils.c \ -o ${raylib} ;; static) + ${cc} ${cflags} ${cppflags} -c external/rcore_extended.c -o external/lib/rcore.c.o ${cc} ${cflags} ${cppflags} -c external/raylib/src/raudio.c -o external/lib/raudio.c.o - ${cc} ${cflags} ${cppflags} -c external/raylib/src/rcore.c -o external/lib/rcore.c.o ${cc} ${cflags} ${cppflags} -c external/raylib/src/rmodels.c -o external/lib/rmodels.c.o ${cc} ${cflags} ${cppflags} -c external/raylib/src/rshapes.c -o external/lib/rshapes.c.o ${cc} ${cflags} ${cppflags} -c external/raylib/src/rtext.c -o external/lib/rtext.c.o diff --git a/external/include/raylib_extended.h b/external/include/raylib_extended.h @@ -0,0 +1,2 @@ +#include "../raylib/src/raylib.h" +RLAPI void *GetPlatformWindowHandle(void); diff --git a/external/rcore_extended.c b/external/rcore_extended.c @@ -0,0 +1,8 @@ +/* NOTE(rnp): hacky stuff to work around broken raylib garbage */ +#include <raylib_extended.h> +#include "raylib/src/rcore.c" + +void *GetPlatformWindowHandle(void) +{ + return (void *)platform.handle; +} diff --git a/intrinsics.c b/intrinsics.c @@ -4,6 +4,11 @@ #define sqrt_f32(a) __builtin_sqrtf(a) #define atan2_f32(y, x) __builtin_atan2f(y, x) +#define atomic_store(ptr, n) __atomic_store_n(ptr, n, __ATOMIC_RELEASE) +#define atomic_load(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE) +#define atomic_and(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_RELEASE) +#define atomic_add(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_RELEASE) + static FORCE_INLINE u32 clz_u32(u32 a) { diff --git a/main_linux.c b/main_linux.c @@ -21,8 +21,9 @@ #include "static.c" static void -dispatch_file_watch_events(FileWatchContext *fwctx, Arena arena) +dispatch_file_watch_events(Platform *platform, Arena arena) { + FileWatchContext *fwctx = &platform->file_watch_context; u8 *mem = alloc_(&arena, 4096, 64, 1); Stream path = stream_alloc(&arena, 256); struct inotify_event *event; @@ -46,7 +47,7 @@ dispatch_file_watch_events(FileWatchContext *fwctx, Arena arena) stream_append_s8(&path, file); stream_append_byte(&path, 0); path.widx--; - fw->callback(stream_to_s8(&path), + fw->callback(platform, stream_to_s8(&path), fw->user_data, arena); path.widx = 0; break; @@ -62,10 +63,11 @@ main(void) { BeamformerCtx ctx = {0}; BeamformerInput input = {.executable_reloaded = 1}; - Arena temp_memory = os_alloc_arena((Arena){0}, 16 * MEGABYTE); - ctx.error_stream = stream_alloc(&temp_memory, 1 * MEGABYTE); + Arena temp_memory = os_alloc_arena((Arena){0}, MB(16)); + ctx.error_stream = stream_alloc(&temp_memory, MB(1)); - ctx.ui_backing_store = sub_arena(&temp_memory, 2 * MEGABYTE, 4096); + ctx.ui_backing_store = sub_arena(&temp_memory, MB(2), KB(4)); + ctx.platform.compute_worker.arena = sub_arena(&temp_memory, MB(2), KB(4)); Pipe data_pipe = os_open_named_pipe(OS_PIPE_NAME); input.pipe_handle = data_pipe.file; @@ -76,9 +78,12 @@ main(void) #undef X ctx.platform.file_watch_context.handle = inotify_init1(O_NONBLOCK|O_CLOEXEC); + ctx.platform.compute_worker.asleep = 1; + ctx.platform.error_file_handle = STDERR_FILENO; - setup_beamformer(&ctx, &temp_memory); debug_init(&ctx.platform, (iptr)&input, &temp_memory); + setup_beamformer(&ctx, &temp_memory); + os_wake_thread(ctx.platform.compute_worker.sync_handle); struct pollfd fds[2] = {{0}, {0}}; fds[0].fd = ctx.platform.file_watch_context.handle; @@ -86,10 +91,10 @@ main(void) fds[1].fd = data_pipe.file; fds[1].events = POLLIN; - while (!(ctx.flags & SHOULD_EXIT)) { + while (!ctx.should_exit) { poll(fds, 2, 0); if (fds[0].revents & POLLIN) - dispatch_file_watch_events(&ctx.platform.file_watch_context, temp_memory); + dispatch_file_watch_events(&ctx.platform, temp_memory); input.pipe_data_available = !!(fds[1].revents & POLLIN); input.last_mouse = input.mouse; diff --git a/main_w32.c b/main_w32.c @@ -46,7 +46,7 @@ w32_wide_char_to_mb(Stream *s, u16 *wstr, u32 wide_char_length) } static void -dispatch_file_watch(FileWatchDirectory *fw_dir, u8 *buf, Arena arena) +dispatch_file_watch(Platform *platform, FileWatchDirectory *fw_dir, u8 *buf, Arena arena) { i64 offset = 0; Stream path = stream_alloc(&arena, 256); @@ -73,8 +73,7 @@ dispatch_file_watch(FileWatchDirectory *fw_dir, u8 *buf, Arena arena) for (u32 i = 0; i < fw_dir->file_watch_count; i++) { FileWatch *fw = fw_dir->file_watches + i; if (fw->hash == hash) { - fw->callback(stream_to_s8(&path), - fw->user_data, arena); + fw->callback(platform, stream_to_s8(&path), fw->user_data, arena); break; } } @@ -98,7 +97,7 @@ clear_io_queue(Platform *platform, BeamformerInput *input, Arena arena) switch (event->tag) { case W32_IO_FILE_WATCH: { FileWatchDirectory *dir = (FileWatchDirectory *)event->context; - dispatch_file_watch(dir, dir->buffer.beg, arena); + dispatch_file_watch(platform, dir, dir->buffer.beg, arena); zero_struct(overlapped); ReadDirectoryChangesW(dir->handle, dir->buffer.beg, 4096, 0, FILE_NOTIFY_CHANGE_LAST_WRITE, 0, overlapped, 0); @@ -143,10 +142,11 @@ main(void) { BeamformerCtx ctx = {0}; BeamformerInput input = {.executable_reloaded = 1}; - Arena temp_memory = os_alloc_arena((Arena){0}, 16 * MEGABYTE); - ctx.error_stream = stream_alloc(&temp_memory, 1 * MEGABYTE); + Arena temp_memory = os_alloc_arena((Arena){0}, MB(16)); + ctx.error_stream = stream_alloc(&temp_memory, MB(1)); - ctx.ui_backing_store = sub_arena(&temp_memory, 2 * MEGABYTE, 4096); + ctx.ui_backing_store = sub_arena(&temp_memory, MB(2), KB(4)); + ctx.platform.compute_worker.arena = sub_arena(&temp_memory, MB(2), KB(4)); Pipe data_pipe = os_open_named_pipe(OS_PIPE_NAME); input.pipe_handle = data_pipe.file; @@ -158,12 +158,16 @@ main(void) w32_context w32_ctx = {0}; w32_ctx.io_completion_handle = CreateIoCompletionPort(INVALID_FILE, 0, 0, 0); - ctx.platform.os_context = (iptr)&w32_ctx; - setup_beamformer(&ctx, &temp_memory); + ctx.platform.os_context = (iptr)&w32_ctx; + ctx.platform.compute_worker.asleep = 1; + ctx.platform.error_file_handle = GetStdHandle(STD_ERROR_HANDLE); + debug_init(&ctx.platform, (iptr)&input, &temp_memory); + setup_beamformer(&ctx, &temp_memory); + os_wake_thread(ctx.platform.compute_worker.sync_handle); - while (!(ctx.flags & SHOULD_EXIT)) { + while (!ctx.should_exit) { clear_io_queue(&ctx.platform, &input, temp_memory); input.last_mouse = input.mouse; diff --git a/os_unix.c b/os_unix.c @@ -8,6 +8,8 @@ #include <dlfcn.h> #include <fcntl.h> #include <poll.h> +#include <pthread.h> +#include <semaphore.h> #include <sys/inotify.h> #include <sys/mman.h> #include <sys/stat.h> @@ -71,20 +73,19 @@ static PLATFORM_OPEN_FOR_WRITE_FN(os_open_for_write) return result; } -static s8 -os_read_file(Arena *a, char *file, size filesize) +static PLATFORM_READ_WHOLE_FILE_FN(os_read_whole_file) { s8 result = {0}; + struct stat sb; i32 fd = open(file, O_RDONLY); - if (fd >= 0) { - result = s8alloc(a, filesize); + if (fd >= 0 && fstat(fd, &sb) >= 0) { + result = s8alloc(arena, sb.st_size); size rlen = read(fd, result.data, result.len); - if (rlen != result.len) { + if (rlen != result.len) result = (s8){0}; - } - close(fd); } + if (fd >= 0) close(fd); return result; } @@ -99,19 +100,12 @@ static PLATFORM_WRITE_NEW_FILE_FN(os_write_new_file) return ret; } -static FileStats -os_get_file_stats(char *fname) +static b32 +os_file_exists(char *path) { struct stat st; - - if (stat(fname, &st) < 0) { - return ERROR_FILE_STATS; - } - - return (FileStats){ - .filesize = st.st_size, - .timestamp = (f64)st.st_mtim.tv_sec + (f64)st.st_mtim.tv_nsec * 1e-9, - }; + b32 result = stat(path, &st) == 0; + return result; } static Pipe @@ -121,13 +115,13 @@ os_open_named_pipe(char *name) return (Pipe){.file = open(name, O_RDONLY|O_NONBLOCK), .name = name}; } -static PLATFORM_READ_PIPE_FN(os_read_pipe) +static PLATFORM_READ_FILE_FN(os_read_file) { size r = 0, total_read = 0; do { if (r != -1) total_read += r; - r = read(pipe, buf + total_read, len - total_read); + r = read(file, buf + total_read, len - total_read); } while (r); return total_read; } @@ -255,3 +249,32 @@ static PLATFORM_ADD_FILE_WATCH_FN(os_add_file_watch) insert_file_watch(dir, s8_cut_head(path, dir->name.len + 1), user_data, callback); } + +i32 pthread_setname_np(pthread_t, char *); +static iptr +os_create_thread(iptr user_context, char *name, platform_thread_entry_point_fn *fn) +{ + pthread_t result; + pthread_create(&result, 0, (void *(*)(void *))fn, (void *)user_context); + pthread_setname_np(result, name); + return (iptr)result; +} + +static iptr +os_create_sync_object(Arena *arena) +{ + sem_t *result = push_struct(arena, sem_t); + sem_init(result, 0, 0); + return (iptr)result; +} + +static void +os_sleep_thread(iptr sync_handle) +{ + sem_wait((sem_t *)sync_handle); +} + +static PLATFORM_WAKE_THREAD_FN(os_wake_thread) +{ + sem_post((sem_t *)sync_handle); +} diff --git a/os_win32.c b/os_win32.c @@ -34,6 +34,8 @@ #define ERROR_PIPE_NOT_CONNECTED 233L #define ERROR_PIPE_LISTENING 536L +#define THREAD_SET_LIMITED_INFORMATION 0x0400 + typedef struct { u16 wProcessorArchitecture; u16 _pad1; @@ -88,10 +90,13 @@ W32(iptr) CreateFileA(c8 *, u32, u32, void *, u32, u32, void *); W32(iptr) CreateFileMappingA(iptr, void *, u32, u32, u32, c8 *); W32(iptr) CreateIoCompletionPort(iptr, iptr, uptr, u32); W32(iptr) CreateNamedPipeA(c8 *, u32, u32, u32, u32, u32, u32, void *); +W32(iptr) CreateSemaphoreA(iptr, i64, i64, c8 *); +W32(iptr) CreateThread(iptr, usize, iptr, iptr, u32, u32 *); W32(b32) DeleteFileA(c8 *); W32(b32) DisconnectNamedPipe(iptr); W32(void) ExitProcess(i32); W32(b32) FreeLibrary(void *); +W32(i32) GetFileAttributesA(c8 *); W32(b32) GetFileInformationByHandle(iptr, w32_file_info *); W32(i32) GetLastError(void); W32(void *) GetProcAddress(void *, c8 *); @@ -102,12 +107,13 @@ W32(void *) LoadLibraryA(c8 *); W32(void *) MapViewOfFile(iptr, u32, u32, u32, u64); W32(b32) ReadDirectoryChangesW(iptr, u8 *, u32, b32, u32, u32 *, void *, void *); W32(b32) ReadFile(iptr, u8 *, i32, i32 *, void *); +W32(b32) ReleaseSemaphore(iptr, i64, i64 *); +W32(i32) SetThreadDescription(iptr, u16 *); +W32(u32) WaitForSingleObjectEx(iptr, u32, b32); W32(b32) WriteFile(iptr, u8 *, i32, i32 *, void *); W32(void *) VirtualAlloc(u8 *, size, u32, u32); W32(b32) VirtualFree(u8 *, size, u32); -static iptr win32_stderr_handle; - static PLATFORM_WRITE_FILE_FN(os_write_file) { i32 wlen; @@ -115,6 +121,8 @@ static PLATFORM_WRITE_FILE_FN(os_write_file) return raw.len == wlen; } +/* TODO(rnp): cleanup callers of this function they should route through error file handle instead */ +static iptr win32_stderr_handle; static void os_write_err_msg(s8 msg) { @@ -165,26 +173,35 @@ static PLATFORM_OPEN_FOR_WRITE_FN(os_open_for_write) return result; } -static s8 -os_read_file(Arena *a, char *file, size filesize) +static PLATFORM_READ_WHOLE_FILE_FN(os_read_whole_file) { s8 result = {0}; - if (filesize > 0 && filesize <= (size)U32_MAX) { - result = s8alloc(a, filesize); - iptr h = CreateFileA(file, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0); - if (h >= 0) { - i32 rlen; - if (!ReadFile(h, result.data, result.len, &rlen, 0) || rlen != result.len) { - result = (s8){0}; - } - CloseHandle(h); - } + w32_file_info fileinfo; + iptr h = CreateFileA(file, GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0); + if (h >= 0 && GetFileInformationByHandle(h, &fileinfo)) { + size filesize = (size)fileinfo.nFileSizeHigh << 32; + filesize |= (size)fileinfo.nFileSizeLow; + result = s8alloc(arena, filesize); + + ASSERT(filesize <= (size)U32_MAX); + + i32 rlen; + if (!ReadFile(h, result.data, result.len, &rlen, 0) || rlen != result.len) + result = (s8){0}; } + if (h >= 0) CloseHandle(h); return result; } +static PLATFORM_READ_FILE_FN(os_read_file) +{ + i32 total_read = 0; + ReadFile(file, buf, len, &total_read, 0); + return total_read; +} + static PLATFORM_WRITE_NEW_FILE_FN(os_write_new_file) { if (raw.len > (size)U32_MAX) { @@ -202,42 +219,21 @@ static PLATFORM_WRITE_NEW_FILE_FN(os_write_new_file) return ret; } -static FileStats -os_get_file_stats(char *fname) +static b32 +os_file_exists(char *path) { - iptr h = CreateFileA(fname, 0, 0, 0, OPEN_EXISTING, 0, 0); - if (h == INVALID_FILE) - return ERROR_FILE_STATS; - - w32_file_info fileinfo; - if (!GetFileInformationByHandle(h, &fileinfo)) { - os_write_err_msg(s8("os_get_file_stats: couldn't get file info\n")); - CloseHandle(h); - return ERROR_FILE_STATS; - } - CloseHandle(h); - - size filesize = (size)fileinfo.nFileSizeHigh << 32; - filesize |= (size)fileinfo.nFileSizeLow; - - return (FileStats){.filesize = filesize, .timestamp = fileinfo.ftLastWriteTime}; + b32 result = GetFileAttributesA(path) != -1; + return result; } static Pipe os_open_named_pipe(char *name) { iptr h = CreateNamedPipeA(name, PIPE_ACCESS_INBOUND, PIPE_TYPE_BYTE|PIPE_NOWAIT, 1, - 0, 1 * MEGABYTE, 0, 0); + 0, MB(1), 0, 0); return (Pipe){.file = h, .name = name}; } -static PLATFORM_READ_PIPE_FN(os_read_pipe) -{ - i32 total_read = 0; - ReadFile(pipe, buf, len, &total_read, 0); - return total_read; -} - static void * os_open_shared_memory_area(char *name, size cap) { @@ -334,3 +330,30 @@ static PLATFORM_ADD_FILE_WATCH_FN(os_add_file_watch) insert_file_watch(dir, s8_cut_head(path, dir->name.len + 1), user_data, callback); } + +static iptr +os_create_thread(iptr user_context, char *name, platform_thread_entry_point_fn *fn) +{ + iptr result = CreateThread(0, 0, (iptr)fn, user_context, 0, 0); + /* TODO(rnp): name needs to be utf16 encoded */ + //SetThreadDescription(result, s8_to_16(arena, name).data); + return result; +} + +static iptr +os_create_sync_object(Arena *arena) +{ + iptr result = CreateSemaphoreA(0, 0, 1, 0); + return result; +} + +static void +os_sleep_thread(iptr sync_handle) +{ + WaitForSingleObjectEx(sync_handle, 0xFFFFFFFF, 0); +} + +static PLATFORM_WAKE_THREAD_FN(os_wake_thread) +{ + ReleaseSemaphore(sync_handle, 1, 0); +} diff --git a/shaders/das.glsl b/shaders/das.glsl @@ -7,9 +7,8 @@ layout(std430, binding = 1) readonly restrict buffer buffer_1 { layout(rg32f, binding = 0) writeonly uniform image3D u_out_data_tex; -layout(location = 2) uniform int u_volume_export_pass; -layout(location = 3) uniform ivec3 u_volume_export_dim_offset; -layout(location = 4) uniform float u_cycle_t; +layout(location = 2) uniform ivec3 u_voxel_offset; +layout(location = 3) uniform float u_cycle_t; #define C_SPLINE 0.5 @@ -66,8 +65,7 @@ vec3 calc_image_point(vec3 voxel) case DAS_ID_HERCULES: case DAS_ID_RCA_TPW: case DAS_ID_RCA_VLS: - if (u_volume_export_pass == 0) - image_point.y = off_axis_pos; + image_point.y = off_axis_pos; break; } @@ -112,7 +110,7 @@ float cylindricalwave_transmit_distance(vec3 point, float focal_depth, float tra vec2 RCA(vec3 image_point, vec3 delta, float apodization_arg) { uint ridx = 0; - int direction = beamform_plane * (u_volume_export_pass ^ 1); + int direction = beamform_plane; if (direction != TX_ROWS) image_point = image_point.yxz; bool tx_col = TX_MODE_TX_COLS(transmit_mode); @@ -158,7 +156,7 @@ vec2 RCA(vec3 image_point, vec3 delta, float apodization_arg) vec2 HERCULES(vec3 image_point, vec3 delta, float apodization_arg) { uint ridx = 0; - int direction = beamform_plane * (u_volume_export_pass ^ 1); + int direction = beamform_plane; if (direction != TX_ROWS) image_point = image_point.yxz; bool tx_col = TX_MODE_TX_COLS(transmit_mode); @@ -237,9 +235,8 @@ vec2 uFORCES(vec3 image_point, vec3 delta, float apodization_arg) void main() { /* NOTE: Convert voxel to physical coordinates */ - ivec3 out_coord = ivec3(gl_GlobalInvocationID) + u_volume_export_dim_offset; - vec3 image_point = calc_image_point(vec3(gl_GlobalInvocationID) - + vec3(u_volume_export_dim_offset)); + ivec3 out_coord = ivec3(gl_GlobalInvocationID) + u_voxel_offset; + vec3 image_point = calc_image_point(vec3(out_coord)); /* NOTE: used for constant F# dynamic receive apodization. This is implemented as: * diff --git a/static.c b/static.c @@ -7,16 +7,32 @@ #else static void *debug_lib; -static beamformer_frame_step_fn *beamformer_frame_step; +#define DEBUG_ENTRY_POINTS \ + X(beamformer_frame_step) \ + X(beamformer_complete_compute) \ + X(beamform_work_queue_push) \ + X(beamform_work_queue_push_commit) + +#define X(name) static name ##_fn *name; +DEBUG_ENTRY_POINTS +#undef X static FILE_WATCH_CALLBACK_FN(debug_reload) { BeamformerInput *input = (BeamformerInput *)user_data; Stream err = arena_stream(&tmp); + /* NOTE(rnp): spin until compute thread finishes its work (we will probably + * never reload while compute is in progress but just incase). */ + while (!atomic_load(&platform->compute_worker.asleep)); + os_unload_library(debug_lib); debug_lib = os_load_library(OS_DEBUG_LIB_NAME, OS_DEBUG_LIB_TEMP_NAME, &err); - beamformer_frame_step = os_lookup_dynamic_symbol(debug_lib, "beamformer_frame_step", &err); + + #define X(name) name = os_lookup_dynamic_symbol(debug_lib, #name, &err); + DEBUG_ENTRY_POINTS + #undef X + os_write_err_msg(s8("Reloaded Main Executable\n")); input->executable_reloaded = 1; @@ -27,7 +43,7 @@ static void debug_init(Platform *p, iptr input, Arena *arena) { p->add_file_watch(p, arena, s8(OS_DEBUG_LIB_NAME), debug_reload, input); - debug_reload((s8){0}, input, *arena); + debug_reload(p, (s8){0}, input, *arena); } #endif /* _DEBUG */ @@ -85,6 +101,7 @@ get_gl_params(GLParams *gl, Stream *err) glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &gl->max_3d_texture_dim); glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &gl->max_ssbo_size); glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &gl->max_ubo_size); + glGetIntegerv(GL_MAX_SERVER_WAIT_TIMEOUT, &gl->max_server_wait_time); } static void @@ -132,49 +149,20 @@ dump_gl_params(GLParams *gl, Arena a) stream_append_i64(&s, gl->version_minor); stream_append_s8(&s, s8("\nMax 1D/2D Texture Dimension: ")); stream_append_i64(&s, gl->max_2d_texture_dim); - stream_append_s8(&s, s8("\nMax 3D Texture Dimension: ")); + stream_append_s8(&s, s8("\nMax 3D Texture Dimension: ")); stream_append_i64(&s, gl->max_3d_texture_dim); - stream_append_s8(&s, s8("\nMax SSBO Size: ")); + stream_append_s8(&s, s8("\nMax SSBO Size: ")); stream_append_i64(&s, gl->max_ssbo_size); - stream_append_s8(&s, s8("\nMax UBO Size: ")); + stream_append_s8(&s, s8("\nMax UBO Size: ")); stream_append_i64(&s, gl->max_ubo_size); + stream_append_s8(&s, s8("\nMax Server Wait Time [ns]: ")); + stream_append_i64(&s, gl->max_server_wait_time); stream_append_s8(&s, s8("\n-----------------------\n")); if (!s.errors) os_write_err_msg(stream_to_s8(&s)); #endif } -static u32 -compile_shader(Arena a, u32 type, s8 shader) -{ - u32 sid = glCreateShader(type); - glShaderSource(sid, 1, (const char **)&shader.data, (int *)&shader.len); - glCompileShader(sid); - - i32 res = 0; - glGetShaderiv(sid, GL_COMPILE_STATUS, &res); - - if (res == GL_FALSE) { - char *stype; - switch (type) { - case GL_COMPUTE_SHADER: stype = "Compute"; break; - case GL_FRAGMENT_SHADER: stype = "Fragment"; break; - } - - TraceLog(LOG_WARNING, "SHADER: [ID %u] %s shader failed to compile", sid, stype); - i32 len = 0; - glGetShaderiv(sid, GL_INFO_LOG_LENGTH, &len); - s8 err = s8alloc(&a, len); - glGetShaderInfoLog(sid, len, (int *)&err.len, (char *)err.data); - TraceLog(LOG_WARNING, "SHADER: [ID %u] Compile error: %s", sid, (char *)err.data); - glDeleteShader(sid); - - sid = 0; - } - - return sid; -} - static FILE_WATCH_CALLBACK_FN(reload_render_shader) { FragmentShaderCtx *ctx = (FragmentShaderCtx *)user_data; @@ -192,61 +180,40 @@ static FILE_WATCH_CALLBACK_FN(reload_render_shader) return 1; } -struct compute_shader_reload_ctx { - BeamformerCtx *ctx; - s8 label; - u32 shader; - b32 needs_header; -}; -static FILE_WATCH_CALLBACK_FN(reload_compute_shader) +static FILE_WATCH_CALLBACK_FN(queue_compute_shader_reload) { - struct compute_shader_reload_ctx *ctx = (struct compute_shader_reload_ctx *)user_data; - ComputeShaderCtx *cs = &ctx->ctx->csctx; - - b32 result = 1; - - /* NOTE: arena works as stack (since everything here is 1 byte aligned) */ - s8 header_in_arena = {.data = tmp.beg}; - if (ctx->needs_header) - header_in_arena = push_s8(&tmp, s8(COMPUTE_SHADER_HEADER)); - - size fs = os_get_file_stats((c8 *)path.data).filesize; - - s8 shader_text = os_read_file(&tmp, (c8 *)path.data, fs); - shader_text.data -= header_in_arena.len; - shader_text.len += header_in_arena.len; - - if (shader_text.data == header_in_arena.data) { - u32 shader_id = compile_shader(tmp, GL_COMPUTE_SHADER, shader_text); - if (shader_id) { - glDeleteProgram(cs->programs[ctx->shader]); - cs->programs[ctx->shader] = rlLoadComputeShaderProgram(shader_id); - glUseProgram(cs->programs[ctx->shader]); - glBindBufferBase(GL_UNIFORM_BUFFER, 0, cs->shared_ubo); - LABEL_GL_OBJECT(GL_PROGRAM, cs->programs[ctx->shader], ctx->label); - - TraceLog(LOG_INFO, "%s loaded", path.data); - - ctx->ctx->flags |= START_COMPUTE; - } else { - result = 0; + ComputeShaderReloadContext *csr = (typeof(csr))user_data; + BeamformerCtx *ctx = csr->beamformer_ctx; + BeamformWork *work = beamform_work_queue_push(ctx->beamform_work_queue); + if (work) { + work->type = BW_RELOAD_SHADER; + work->reload_shader_ctx = csr; + beamform_work_queue_push_commit(ctx->beamform_work_queue); + if (ctx->platform.compute_worker.asleep && + ctx->beamform_frames[ctx->display_frame_index].ready_to_present) + { + BeamformWork *compute = beamform_work_queue_push(ctx->beamform_work_queue); + if (compute) { + compute->type = BW_COMPUTE; + compute->frame = ctx->beamform_frames + ctx->next_render_frame_index++; + compute->frame->ready_to_present = 0; + if (ctx->next_render_frame_index >= ARRAY_COUNT(ctx->beamform_frames)) + ctx->next_render_frame_index = 0; + beamform_work_queue_push_commit(ctx->beamform_work_queue); + } } - glDeleteShader(shader_id); - } else { - TraceLog(LOG_INFO, "shader failed to load: %s", path.data); + ctx->platform.wake_thread(ctx->platform.compute_worker.sync_handle); } - - return result; + return 1; } static FILE_WATCH_CALLBACK_FN(load_cuda_lib) { CudaLib *cl = (CudaLib *)user_data; - b32 result = 0; - size fs = os_get_file_stats((c8 *)path.data).filesize; - if (fs > 0) { - TraceLog(LOG_INFO, "Loading CUDA lib: %s", OS_CUDA_LIB_NAME); + b32 result = os_file_exists((c8 *)path.data); + if (result) { + os_write_err_msg(s8("loading CUDA lib: " OS_CUDA_LIB_NAME "\n")); Stream err = arena_stream(&tmp); os_unload_library(cl->lib); @@ -254,8 +221,6 @@ static FILE_WATCH_CALLBACK_FN(load_cuda_lib) #define X(name) cl->name = os_lookup_dynamic_symbol(cl->lib, #name, &err); CUDA_LIB_FNS #undef X - - result = 1; } #define X(name) if (!cl->name) cl->name = name ## _stub; @@ -265,6 +230,31 @@ static FILE_WATCH_CALLBACK_FN(load_cuda_lib) return result; } + +#define GLFW_VISIBLE 0x00020004 +void glfwWindowHint(i32, i32); +iptr glfwCreateWindow(i32, i32, char *, iptr, iptr); +void glfwMakeContextCurrent(iptr); + +#include <stdio.h> +static PLATFORM_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point) +{ + GLWorkerThreadContext *ctx = (GLWorkerThreadContext *)_ctx; + + glfwMakeContextCurrent(ctx->window_handle); + + for (;;) { + ctx->asleep = 1; + os_sleep_thread(ctx->sync_handle); + ctx->asleep = 0; + beamformer_complete_compute(ctx->user_context, ctx->arena); + } + + unreachable(); + + return 0; +} + static void setup_beamformer(BeamformerCtx *ctx, Arena *memory) { @@ -281,6 +271,20 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory) dump_gl_params(&ctx->gl, *memory); validate_gl_requirements(&ctx->gl); + glfwWindowHint(GLFW_VISIBLE, 0); + iptr raylib_window_handle = (iptr)GetPlatformWindowHandle(); + GLWorkerThreadContext *worker = &ctx->platform.compute_worker; + worker->window_handle = glfwCreateWindow(320, 240, "", 0, raylib_window_handle); + worker->sync_handle = os_create_sync_object(memory); + worker->handle = os_create_thread((iptr)worker, "[compute]", + compute_worker_thread_entry_point); + /* TODO(rnp): we should lock this down after we have something working */ + worker->user_context = (iptr)ctx; + + glfwMakeContextCurrent(raylib_window_handle); + + ctx->beamform_work_queue = push_struct(memory, BeamformWorkQueue); + ctx->fsctx.db = -50.0f; ctx->fsctx.threshold = 40.0f; @@ -295,7 +299,7 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory) ctx->params->compute_stages_count = 3; if (ctx->gl.vendor_id == GL_VENDOR_NVIDIA - && load_cuda_lib(s8(OS_CUDA_LIB_NAME), (iptr)&ctx->cuda_lib, *memory)) + && load_cuda_lib(&ctx->platform, s8(OS_CUDA_LIB_NAME), (iptr)&ctx->cuda_lib, *memory)) { os_add_file_watch(&ctx->platform, memory, s8(OS_CUDA_LIB_NAME), load_cuda_lib, (iptr)&ctx->cuda_lib); @@ -316,30 +320,24 @@ setup_beamformer(BeamformerCtx *ctx, Arena *memory) glNamedBufferStorage(ctx->csctx.shared_ubo, sizeof(BeamformerParameters), 0, GL_DYNAMIC_STORAGE_BIT); LABEL_GL_OBJECT(GL_BUFFER, ctx->csctx.shared_ubo, s8("Beamformer_Parameters")); - glGenQueries(ARRAY_COUNT(ctx->csctx.timer_fences) * CS_LAST, (u32 *)ctx->csctx.timer_ids); - glGenQueries(ARRAY_COUNT(ctx->partial_compute_ctx.timer_ids), ctx->partial_compute_ctx.timer_ids); - - #define X(e, sn, f, nh, pretty_name) do if (s8(f).len > 0) { \ - struct compute_shader_reload_ctx *csr = push_struct(memory, typeof(*csr)); \ - csr->ctx = ctx; \ - csr->label = s8("CS_" #e); \ - csr->shader = sn; \ - csr->needs_header = nh; \ - s8 shader = s8(static_path_join("shaders", f ".glsl")); \ - reload_compute_shader(shader, (iptr)csr, *memory); \ - os_add_file_watch(&ctx->platform, memory, shader, reload_compute_shader, (iptr)csr); \ + #define X(e, sn, f, nh, pretty_name) do if (s8(f).len > 0) { \ + ComputeShaderReloadContext *csr = push_struct(memory, typeof(*csr)); \ + csr->beamformer_ctx = ctx; \ + csr->label = s8("CS_" #e); \ + csr->shader = sn; \ + csr->needs_header = nh; \ + csr->path = s8(static_path_join("shaders", f ".glsl")); \ + os_add_file_watch(&ctx->platform, memory, csr->path, queue_compute_shader_reload, (iptr)csr); \ + queue_compute_shader_reload(&ctx->platform, csr->path, (iptr)csr, *memory); \ } while (0); COMPUTE_SHADERS #undef X + os_wake_thread(worker->sync_handle); s8 render = s8(static_path_join("shaders", "render.glsl")); - reload_render_shader(render, (iptr)&ctx->fsctx, *memory); + reload_render_shader(&ctx->platform, render, (iptr)&ctx->fsctx, *memory); os_add_file_watch(&ctx->platform, memory, render, reload_render_shader, (iptr)&ctx->fsctx); ctx->fsctx.gen_mipmaps = 0; - /* TODO(rnp): remove this */ - ComputeShaderCtx *csctx = &ctx->csctx; - #define X(idx, name) csctx->name##_id = glGetUniformLocation(csctx->programs[idx], "u_" #name); - CS_UNIFORMS - #undef X + ctx->ready_for_rf = 1; } diff --git a/ui.c b/ui.c @@ -211,9 +211,9 @@ do_scale_bar(BeamformerUI *ui, Stream *buf, Variable var, v2 mouse, i32 directio static void draw_display_overlay(BeamformerCtx *ctx, Arena a, v2 mouse, Rect display_rect, BeamformFrame *frame) { - BeamformerUI *ui = ctx->ui; - BeamformerParameters *bp = &ctx->params->raw; - InteractionState *is = &ui->interaction; + BeamformerUI *ui = ctx->ui; + BeamformerUIParameters *bp = &ui->params; + InteractionState *is = &ui->interaction; Stream buf = arena_stream(&a); Texture *output = &ctx->fsctx.output.texture; @@ -531,8 +531,8 @@ do_text_button(BeamformerUI *ui, s8 text, Rect r, v2 mouse, f32 *hover_t) static void draw_settings_ui(BeamformerCtx *ctx, Rect r, v2 mouse) { - BeamformerUI *ui = ctx->ui; - BeamformerParameters *bp = &ctx->params->raw; + BeamformerUI *ui = ctx->ui; + BeamformerUIParameters *bp = &ui->params; f32 minx = bp->output_min_coordinate.x + 1e-6, maxx = bp->output_max_coordinate.x - 1e-6; f32 minz = bp->output_min_coordinate.z + 1e-6, maxz = bp->output_max_coordinate.z - 1e-6; @@ -684,7 +684,7 @@ draw_settings_ui(BeamformerCtx *ctx, Rect r, v2 mouse) } static void -draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Rect r) +draw_debug_overlay(BeamformerCtx *ctx, BeamformFrame *frame, Arena arena, Rect r) { static s8 labels[CS_LAST] = { #define X(e, n, s, h, pn) [CS_##e] = s8(pn ":"), @@ -692,8 +692,7 @@ draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Rect r) #undef X }; - BeamformerUI *ui = ctx->ui; - ComputeShaderCtx *cs = &ctx->csctx; + BeamformerUI *ui = ctx->ui; uv2 ws = ctx->window_size; Stream buf = stream_alloc(&arena, 64); @@ -707,28 +706,23 @@ draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Rect r) draw_text(ui->font, labels[index], pos, 0, colour_from_normalized(FG_COLOUR)); buf.widx = 0; - stream_append_f64_e(&buf, cs->last_frame_time[index]); + stream_append_f64_e(&buf, frame->compute_times[index]); stream_append_s8(&buf, s8(" [s]")); v2 txt_fs = measure_text(ui->font, stream_to_s8(&buf)); v2 rpos = {.x = r.pos.x + r.size.w - txt_fs.w, .y = pos.y}; draw_text(ui->font, stream_to_s8(&buf), rpos, 0, colour_from_normalized(FG_COLOUR)); - compute_time_sum += cs->last_frame_time[index]; + compute_time_sum += frame->compute_times[index]; } - static s8 totals[2] = {s8("Compute Total:"), s8("Volume Total:")}; - f32 times[2] = {compute_time_sum, ctx->partial_compute_ctx.runtime}; - for (u32 i = 0; i < ARRAY_COUNT(totals); i++) { - pos.y -= measure_text(ui->font, totals[i]).y; - draw_text(ui->font, totals[i], pos, 0, colour_from_normalized(FG_COLOUR)); - - buf.widx = 0; - stream_append_f64_e(&buf, times[i]); - stream_append_s8(&buf, s8(" [s]")); - v2 txt_fs = measure_text(ui->font, stream_to_s8(&buf)); - v2 rpos = {.x = r.pos.x + r.size.w - txt_fs.w, .y = pos.y}; - draw_text(ui->font, stream_to_s8(&buf), rpos, 0, colour_from_normalized(FG_COLOUR)); - } + pos.y -= ui->font_height; + draw_text(ui->font, s8("Compute Total:"), pos, 0, colour_from_normalized(FG_COLOUR)); + buf.widx = 0; + stream_append_f64_e(&buf, compute_time_sum); + stream_append_s8(&buf, s8(" [s]")); + v2 txt_fs = measure_text(ui->font, stream_to_s8(&buf)); + v2 rpos = {.x = r.pos.x + r.size.w - txt_fs.w, .y = pos.y}; + draw_text(ui->font, stream_to_s8(&buf), rpos, 0, colour_from_normalized(FG_COLOUR)); { static v2 pos = {.x = 32, .y = 128}; @@ -849,30 +843,6 @@ update_text_input(InputState *is) } } -static b32 -ui_can_start_compute(BeamformerCtx *ctx) -{ - BeamformFrame *displayed = ctx->beamform_frames + ctx->displayed_frame_index; - b32 result = ctx->beamform_work_queue.compute_in_flight == 0; - result &= (displayed->dim.x != 0 || displayed->dim.y != 0); - result &= displayed->dim.z != 0; - return result; -} - -static void -ui_start_compute(BeamformerCtx *ctx) -{ - /* NOTE: we do not allow ui to start a work if no work was previously completed */ - Arena a = {0}; - if (ui_can_start_compute(ctx)) { - beamform_work_queue_push(ctx, &a, BW_RECOMPUTE); - BeamformFrameIterator bfi = beamform_frame_iterator(ctx); - for (BeamformFrame *frame = frame_next(&bfi); frame; frame = frame_next(&bfi)) - glClearTexImage(frame->texture, 0, GL_RED, GL_FLOAT, 0); - } - ctx->params->upload = 1; -} - static void ui_gen_mipmaps(BeamformerCtx *ctx) { @@ -948,7 +918,7 @@ scale_bar_interaction(BeamformerCtx *ctx, v2 mouse) *sb->max_value = MIN(max, is->active.f32_limits.y); sb->zoom_starting_point = (v2){.x = F32_INFINITY, .y = F32_INFINITY}; - ui_start_compute(ctx); + ui->flush_params = 1; } } @@ -957,11 +927,12 @@ scale_bar_interaction(BeamformerCtx *ctx, v2 mouse) if (savepoint) { *sb->min_value = savepoint->v.x; *sb->max_value = savepoint->v.y; - ui_start_compute(ctx); sb->savepoint_stack = savepoint->next; savepoint->next = ui->scale_bar_savepoint_freelist; ui->scale_bar_savepoint_freelist = savepoint; + + ui->flush_params = 1; } sb->zoom_starting_point = (v2){.x = F32_INFINITY, .y = F32_INFINITY}; } @@ -972,7 +943,7 @@ scale_bar_interaction(BeamformerCtx *ctx, v2 mouse) *sb->max_value += mouse_wheel; *sb->min_value = MAX(limits.x, *sb->min_value); *sb->max_value = MIN(limits.y, *sb->max_value); - ui_start_compute(ctx); + ui->flush_params = 1; } } @@ -1044,7 +1015,7 @@ ui_end_interact(BeamformerCtx *ctx, v2 mouse) } if (is->active.flags & V_CAUSES_COMPUTE) - ui_start_compute(ctx); + ui->flush_params = 1; if (is->active.flags & V_GEN_MIPMAPS) ui_gen_mipmaps(ctx); @@ -1119,10 +1090,10 @@ ui_init(BeamformerCtx *ctx, Arena store) ui->small_font_height = measure_text(ui->small_font, s8("8\\W")).h; /* TODO: multiple views */ - ui->scale_bars[0][SB_LATERAL].min_value = &ctx->params->raw.output_min_coordinate.x; - ui->scale_bars[0][SB_LATERAL].max_value = &ctx->params->raw.output_max_coordinate.x; - ui->scale_bars[0][SB_AXIAL].min_value = &ctx->params->raw.output_min_coordinate.z; - ui->scale_bars[0][SB_AXIAL].max_value = &ctx->params->raw.output_max_coordinate.z; + ui->scale_bars[0][SB_LATERAL].min_value = &ui->params.output_min_coordinate.x; + ui->scale_bars[0][SB_LATERAL].max_value = &ui->params.output_max_coordinate.x; + ui->scale_bars[0][SB_AXIAL].min_value = &ui->params.output_min_coordinate.z; + ui->scale_bars[0][SB_AXIAL].max_value = &ui->params.output_max_coordinate.z; ui->scale_bars[0][SB_LATERAL].scroll_both = 1; ui->scale_bars[0][SB_AXIAL].scroll_both = 0; @@ -1142,10 +1113,24 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformFrame *frame_to_draw //end_temp_arena(ui->frame_temporary_arena); //ui->frame_temporary_arena = begin_temp_arena(&ui->arena_for_frame); + /* TODO(rnp): there should be a better way of detecting this */ + if (ui->read_params) { + mem_copy(&ctx->params->raw.output_min_coordinate, &ui->params, sizeof(ui->params)); + ui->flush_params = 0; + ui->read_params = 0; + } + /* NOTE: process interactions first because the user interacted with * the ui that was presented last frame */ ui_interact(ctx, input); + if (ui->flush_params && !ctx->csctx.processing_compute) { + mem_copy(&ui->params, &ctx->params->raw.output_min_coordinate, sizeof(ui->params)); + ui->flush_params = 0; + ctx->params->upload = 1; + ctx->start_compute = 1; + } + BeginDrawing(); ClearBackground(colour_from_normalized(BG_COLOUR)); @@ -1157,8 +1142,8 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformFrame *frame_to_draw rr.pos.x = lr.pos.x + lr.size.w; draw_settings_ui(ctx, lr, mouse); - if (frame_to_draw->dim.w) + if (frame_to_draw->ready_to_present) draw_display_overlay(ctx, ui->arena_for_frame, mouse, rr, frame_to_draw); - draw_debug_overlay(ctx, ui->arena_for_frame, lr); + draw_debug_overlay(ctx, frame_to_draw, ui->arena_for_frame, lr); EndDrawing(); } diff --git a/util.h b/util.h @@ -47,8 +47,9 @@ #define ORONE(x) ((x)? (x) : 1) #define SIGN(x) ((x) < 0? -1 : 1) -#define MEGABYTE (1024ULL * 1024ULL) -#define GIGABYTE (1024ULL * 1024ULL * 1024ULL) +#define KB(a) ((a) << 10ULL) +#define MB(a) ((a) << 20ULL) +#define GB(a) ((a) << 30ULL) #define U32_MAX (0xFFFFFFFFUL) #define F32_INFINITY (__builtin_inff()) @@ -65,6 +66,7 @@ typedef uint32_t b32; typedef float f32; typedef double f64; typedef ptrdiff_t size; +typedef size_t usize; typedef ptrdiff_t iptr; typedef size_t uptr; @@ -156,36 +158,6 @@ typedef struct { #define INVALID_FILE (-1) typedef struct { - size filesize; - f64 timestamp; -} FileStats; -#define ERROR_FILE_STATS (FileStats){.filesize = -1} - -#define FILE_WATCH_CALLBACK_FN(name) b32 name(s8 path, iptr user_data, Arena tmp) -typedef FILE_WATCH_CALLBACK_FN(file_watch_callback); - -typedef struct { - iptr user_data; - u64 hash; - file_watch_callback *callback; -} FileWatch; - -typedef struct { - u64 hash; - iptr handle; - s8 name; - FileWatch file_watches[16]; - u32 file_watch_count; - Arena buffer; -} FileWatchDirectory; - -typedef struct { - FileWatchDirectory directory_watches[4]; - iptr handle; - u32 directory_watch_count; -} FileWatchContext; - -typedef struct { u8 *data; u32 widx; u32 cap; @@ -229,6 +201,40 @@ typedef struct { typedef struct Platform Platform; +typedef struct { + Arena arena; + iptr handle; + iptr window_handle; + iptr sync_handle; + iptr user_context; + b32 asleep; +} GLWorkerThreadContext; + +#define FILE_WATCH_CALLBACK_FN(name) b32 name(Platform *platform, s8 path, iptr user_data, Arena tmp) +typedef FILE_WATCH_CALLBACK_FN(file_watch_callback); + +typedef struct { + iptr user_data; + u64 hash; + file_watch_callback *callback; +} FileWatch; + +typedef struct { + u64 hash; + iptr handle; + s8 name; + /* TODO(rnp): just push these as a linked list */ + FileWatch file_watches[16]; + u32 file_watch_count; + Arena buffer; +} FileWatchDirectory; + +typedef struct { + FileWatchDirectory directory_watches[4]; + iptr handle; + u32 directory_watch_count; +} FileWatchContext; + #define PLATFORM_ALLOC_ARENA_FN(name) Arena name(Arena old, size capacity) typedef PLATFORM_ALLOC_ARENA_FN(platform_alloc_arena_fn); @@ -236,14 +242,23 @@ typedef PLATFORM_ALLOC_ARENA_FN(platform_alloc_arena_fn); file_watch_callback *callback, iptr user_data) typedef PLATFORM_ADD_FILE_WATCH_FN(platform_add_file_watch_fn); +#define PLATFORM_WAKE_WORKER_FN(name) void name(GLWorkerThreadContext *ctx) +typedef PLATFORM_WAKE_WORKER_FN(platform_wake_worker_fn); + #define PLATFORM_CLOSE_FN(name) void name(iptr file) typedef PLATFORM_CLOSE_FN(platform_close_fn); #define PLATFORM_OPEN_FOR_WRITE_FN(name) iptr name(c8 *fname) typedef PLATFORM_OPEN_FOR_WRITE_FN(platform_open_for_write_fn); -#define PLATFORM_READ_PIPE_FN(name) size name(iptr pipe, void *buf, size len) -typedef PLATFORM_READ_PIPE_FN(platform_read_pipe_fn); +#define PLATFORM_READ_WHOLE_FILE_FN(name) s8 name(Arena *arena, char *file) +typedef PLATFORM_READ_WHOLE_FILE_FN(platform_read_whole_file_fn); + +#define PLATFORM_READ_FILE_FN(name) size name(iptr file, void *buf, size len) +typedef PLATFORM_READ_FILE_FN(platform_read_file_fn); + +#define PLATFORM_WAKE_THREAD_FN(name) void name(iptr sync_handle) +typedef PLATFORM_WAKE_THREAD_FN(platform_wake_thread_fn); #define PLATFORM_WRITE_NEW_FILE_FN(name) b32 name(char *fname, s8 raw) typedef PLATFORM_WRITE_NEW_FILE_FN(platform_write_new_file_fn); @@ -251,13 +266,18 @@ typedef PLATFORM_WRITE_NEW_FILE_FN(platform_write_new_file_fn); #define PLATFORM_WRITE_FILE_FN(name) b32 name(iptr file, s8 raw) typedef PLATFORM_WRITE_FILE_FN(platform_write_file_fn); -#define PLATFORM_FNS \ - X(add_file_watch) \ - X(alloc_arena) \ - X(close) \ - X(open_for_write) \ - X(read_pipe) \ - X(write_new_file) \ +#define PLATFORM_THREAD_ENTRY_POINT_FN(name) iptr name(iptr _ctx) +typedef PLATFORM_THREAD_ENTRY_POINT_FN(platform_thread_entry_point_fn); + +#define PLATFORM_FNS \ + X(add_file_watch) \ + X(alloc_arena) \ + X(close) \ + X(open_for_write) \ + X(read_whole_file) \ + X(read_file) \ + X(wake_thread) \ + X(write_new_file) \ X(write_file) #define X(name) platform_ ## name ## _fn *name; @@ -265,18 +285,11 @@ struct Platform { PLATFORM_FNS FileWatchContext file_watch_context; iptr os_context; + iptr error_file_handle; + GLWorkerThreadContext compute_worker; }; #undef X -typedef struct { - b32 executable_reloaded; - b32 pipe_data_available; - iptr pipe_handle; - - v2 mouse; - v2 last_mouse; -} BeamformerInput; - #include "util.c" #endif /* _UTIL_H_ */