beamformer.c (25411B)
1 /* See LICENSE for license details. */ 2 #include "beamformer.h" 3 4 static f32 dt_for_frame; 5 static f32 cycle_t; 6 7 static size 8 decoded_data_size(ComputeShaderCtx *cs) 9 { 10 uv4 dim = cs->dec_data_dim; 11 size result = 2 * sizeof(f32) * dim.x * dim.y * dim.z; 12 return result; 13 } 14 15 static uv3 16 make_valid_test_dim(uv3 in) 17 { 18 uv3 result; 19 result.x = MAX(in.x, 1); 20 result.y = MAX(in.y, 1); 21 result.z = MAX(in.z, 1); 22 return result; 23 } 24 25 static BeamformFrameIterator 26 beamform_frame_iterator(BeamformerCtx *ctx) 27 { 28 BeamformFrameIterator result; 29 result.frames = ctx->beamform_frames; 30 result.offset = ctx->displayed_frame_index; 31 result.capacity = ARRAY_COUNT(ctx->beamform_frames); 32 result.cursor = 0; 33 result.needed_frames = ORONE(ctx->params->raw.output_points.w); 34 return result; 35 } 36 37 static BeamformFrame * 38 frame_next(BeamformFrameIterator *bfi) 39 { 40 BeamformFrame *result = 0; 41 if (bfi->cursor != bfi->needed_frames) { 42 u32 index = (bfi->offset - bfi->cursor++) % bfi->capacity; 43 result = bfi->frames + index; 44 } 45 return result; 46 } 47 48 static void 49 alloc_beamform_frame(GLParams *gp, BeamformFrame *out, uv3 out_dim, u32 frame_index, s8 name) 50 { 51 glDeleteTextures(1, &out->texture); 52 53 out->dim.x = CLAMP(round_down_power_of_2(ORONE(out_dim.x)), 1, gp->max_3d_texture_dim); 54 out->dim.y = CLAMP(round_down_power_of_2(ORONE(out_dim.y)), 1, gp->max_3d_texture_dim); 55 out->dim.z = CLAMP(round_down_power_of_2(ORONE(out_dim.z)), 1, gp->max_3d_texture_dim); 56 57 /* NOTE: allocate storage for beamformed output data; 58 * this is shared between compute and fragment shaders */ 59 u32 max_dim = MAX(out->dim.x, MAX(out->dim.y, out->dim.z)); 60 out->mips = ctz_u32(max_dim) + 1; 61 62 /* TODO(rnp): arena?? */ 63 u8 buf[256]; 64 Stream label = {.data = buf, .cap = ARRAY_COUNT(buf)}; 65 stream_append_s8(&label, name); 66 stream_append_byte(&label, '['); 67 stream_append_u64(&label, frame_index); 68 stream_append_s8(&label, s8("]")); 69 70 glCreateTextures(GL_TEXTURE_3D, 1, &out->texture); 71 glTextureStorage3D(out->texture, out->mips, GL_RG32F, out->dim.x, out->dim.y, out->dim.z); 72 LABEL_GL_OBJECT(GL_TEXTURE, out->texture, stream_to_s8(&label)); 73 } 74 75 static void 76 alloc_output_image(BeamformerCtx *ctx, uv3 output_dim) 77 { 78 uv3 try_dim = make_valid_test_dim(output_dim); 79 if (!uv3_equal(try_dim, ctx->averaged_frame.dim)) { 80 alloc_beamform_frame(&ctx->gl, &ctx->averaged_frame, try_dim, 0, 81 s8("Beamformed_Averaged_Data")); 82 uv3 odim = ctx->averaged_frame.dim; 83 84 UnloadRenderTexture(ctx->fsctx.output); 85 /* TODO: select odim.x vs odim.y */ 86 ctx->fsctx.output = LoadRenderTexture(odim.x, odim.z); 87 LABEL_GL_OBJECT(GL_FRAMEBUFFER, ctx->fsctx.output.id, s8("Rendered_View")); 88 GenTextureMipmaps(&ctx->fsctx.output.texture); 89 //SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_ANISOTROPIC_8X); 90 //SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_TRILINEAR); 91 SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_BILINEAR); 92 93 /* NOTE(rnp): work around raylib's janky texture sampling */ 94 i32 id = ctx->fsctx.output.texture.id; 95 glTextureParameteri(id, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER); 96 glTextureParameteri(id, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER); 97 98 f32 border_color[] = {0, 0, 0, 1}; 99 glTextureParameterfv(id, GL_TEXTURE_BORDER_COLOR, border_color); 100 } 101 } 102 103 static void 104 alloc_shader_storage(BeamformerCtx *ctx, Arena a) 105 { 106 ComputeShaderCtx *cs = &ctx->csctx; 107 BeamformerParameters *bp = &ctx->params->raw; 108 uv4 dec_data_dim = bp->dec_data_dim; 109 uv2 rf_raw_dim = bp->rf_raw_dim; 110 ctx->csctx.dec_data_dim = dec_data_dim; 111 ctx->csctx.rf_raw_dim = rf_raw_dim; 112 size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16); 113 size rf_decoded_size = decoded_data_size(cs); 114 115 glDeleteBuffers(ARRAY_COUNT(cs->rf_data_ssbos), cs->rf_data_ssbos); 116 glCreateBuffers(ARRAY_COUNT(cs->rf_data_ssbos), cs->rf_data_ssbos); 117 118 i32 storage_flags = GL_DYNAMIC_STORAGE_BIT; 119 switch (ctx->gl.vendor_id) { 120 case GL_VENDOR_AMD: 121 case GL_VENDOR_ARM: 122 case GL_VENDOR_INTEL: 123 if (cs->raw_data_ssbo) 124 glUnmapNamedBuffer(cs->raw_data_ssbo); 125 storage_flags |= GL_MAP_WRITE_BIT|GL_MAP_PERSISTENT_BIT; 126 case GL_VENDOR_NVIDIA: 127 /* NOTE: register_cuda_buffers will handle the updated ssbo */ 128 break; 129 } 130 131 size full_rf_buf_size = ARRAY_COUNT(cs->raw_data_fences) * rf_raw_size; 132 glDeleteBuffers(1, &cs->raw_data_ssbo); 133 glCreateBuffers(1, &cs->raw_data_ssbo); 134 glNamedBufferStorage(cs->raw_data_ssbo, full_rf_buf_size, 0, storage_flags); 135 LABEL_GL_OBJECT(GL_BUFFER, cs->raw_data_ssbo, s8("Raw_Data_SSBO")); 136 137 Stream label = stream_alloc(&a, 256); 138 stream_append_s8(&label, s8("RF_SSBO_")); 139 u32 s_widx = label.widx; 140 for (u32 i = 0; i < ARRAY_COUNT(cs->rf_data_ssbos); i++) { 141 glNamedBufferStorage(cs->rf_data_ssbos[i], rf_decoded_size, 0, 0); 142 stream_append_u64(&label, i); 143 s8 rf_label = stream_to_s8(&label); 144 LABEL_GL_OBJECT(GL_BUFFER, cs->rf_data_ssbos[i], rf_label); 145 label.widx = s_widx; 146 } 147 148 i32 map_flags = GL_MAP_WRITE_BIT|GL_MAP_PERSISTENT_BIT|GL_MAP_UNSYNCHRONIZED_BIT; 149 switch (ctx->gl.vendor_id) { 150 case GL_VENDOR_AMD: 151 case GL_VENDOR_ARM: 152 case GL_VENDOR_INTEL: 153 cs->raw_data_arena.beg = glMapNamedBufferRange(cs->raw_data_ssbo, 0, 154 full_rf_buf_size, map_flags); 155 break; 156 case GL_VENDOR_NVIDIA: 157 cs->raw_data_arena = ctx->platform.alloc_arena(cs->raw_data_arena, full_rf_buf_size); 158 ctx->cuda_lib.register_cuda_buffers(cs->rf_data_ssbos, ARRAY_COUNT(cs->rf_data_ssbos), 159 cs->raw_data_ssbo); 160 ctx->cuda_lib.init_cuda_configuration(bp->rf_raw_dim.E, bp->dec_data_dim.E, 161 bp->channel_mapping); 162 break; 163 } 164 165 /* NOTE: store hadamard in GPU once; it won't change for a particular imaging session */ 166 size hadamard_elements = dec_data_dim.z * dec_data_dim.z; 167 i32 *hadamard = alloc(&a, i32, hadamard_elements); 168 i32 *tmp = alloc(&a, i32, hadamard_elements); 169 fill_hadamard_transpose(hadamard, tmp, dec_data_dim.z); 170 glDeleteTextures(1, &cs->hadamard_texture); 171 glCreateTextures(GL_TEXTURE_2D, 1, &cs->hadamard_texture); 172 glTextureStorage2D(cs->hadamard_texture, 1, GL_R8I, dec_data_dim.z, dec_data_dim.z); 173 glTextureSubImage2D(cs->hadamard_texture, 0, 0, 0, dec_data_dim.z, dec_data_dim.z, 174 GL_RED_INTEGER, GL_INT, hadamard); 175 LABEL_GL_OBJECT(GL_TEXTURE, cs->hadamard_texture, s8("Hadamard_Matrix")); 176 } 177 178 static BeamformWork * 179 beamform_work_queue_pop(BeamformWorkQueue *q) 180 { 181 BeamformWork *result = q->first; 182 if (result) { 183 switch (result->type) { 184 case BW_FULL_COMPUTE: 185 case BW_RECOMPUTE: 186 case BW_PARTIAL_COMPUTE: 187 /* NOTE: only one compute is allowed per frame */ 188 if (q->did_compute_this_frame) { 189 result = 0; 190 } else { 191 q->compute_in_flight--; 192 q->did_compute_this_frame = 1; 193 ASSERT(q->compute_in_flight >= 0); 194 } 195 break; 196 } 197 } 198 /* NOTE: only do this once we have determined if we are doing the work */ 199 if (result) { 200 q->first = result->next; 201 if (result == q->last) { 202 ASSERT(result->next == 0); 203 q->last = 0; 204 } 205 } 206 207 return result; 208 } 209 210 static BeamformWork * 211 beamform_work_queue_push(BeamformerCtx *ctx, Arena *a, enum beamform_work work_type) 212 { 213 /* TODO: we should have a sub arena specifically for this purpose */ 214 215 BeamformWorkQueue *q = &ctx->beamform_work_queue; 216 ComputeShaderCtx *cs = &ctx->csctx; 217 218 BeamformWork *result = q->next_free; 219 if (result) q->next_free = result->next; 220 else result = alloc(a, typeof(*result), 1); 221 222 if (result) { 223 result->type = work_type; 224 result->next = 0; 225 226 switch (work_type) { 227 case BW_FULL_COMPUTE: 228 if (q->compute_in_flight >= ARRAY_COUNT(cs->raw_data_fences)) { 229 result->next = q->next_free; 230 q->next_free = result; 231 result = 0; 232 break; 233 } 234 cs->raw_data_index++; 235 if (cs->raw_data_index >= ARRAY_COUNT(cs->raw_data_fences)) 236 cs->raw_data_index = 0; 237 /* FALLTHROUGH */ 238 case BW_RECOMPUTE: { 239 i32 raw_index = cs->raw_data_index; 240 result->compute_ctx.raw_data_ssbo_index = raw_index; 241 /* NOTE: if this times out it means the command queue is more than 3 242 * frames behind. In that case we need to re-evaluate the buffer size */ 243 if (cs->raw_data_fences[raw_index]) { 244 i32 result = glClientWaitSync(cs->raw_data_fences[raw_index], 0, 245 10000); 246 if (result == GL_TIMEOUT_EXPIRED) { 247 //ASSERT(0); 248 } 249 glDeleteSync(cs->raw_data_fences[raw_index]); 250 cs->raw_data_fences[raw_index] = NULL; 251 } 252 ctx->displayed_frame_index++; 253 if (ctx->displayed_frame_index >= ARRAY_COUNT(ctx->beamform_frames)) 254 ctx->displayed_frame_index = 0; 255 result->compute_ctx.frame = ctx->beamform_frames + ctx->displayed_frame_index; 256 result->compute_ctx.first_pass = 1; 257 258 BeamformFrameIterator bfi = beamform_frame_iterator(ctx); 259 for (BeamformFrame *frame = frame_next(&bfi); frame; frame = frame_next(&bfi)) { 260 uv3 try_dim = ctx->params->raw.output_points.xyz; 261 if (!uv3_equal(frame->dim, try_dim)) { 262 u32 index = (bfi.offset - bfi.cursor) % bfi.capacity; 263 alloc_beamform_frame(&ctx->gl, frame, try_dim, index, 264 s8("Beamformed_Data")); 265 } 266 } 267 } /* FALLTHROUGH */ 268 case BW_PARTIAL_COMPUTE: 269 q->compute_in_flight++; 270 case BW_SAVE_FRAME: 271 case BW_SEND_FRAME: 272 case BW_SSBO_COPY: 273 break; 274 } 275 276 if (result) { 277 if (q->last) q->last = q->last->next = result; 278 else q->last = q->first = result; 279 } 280 } 281 282 return result; 283 } 284 285 static void 286 export_frame(BeamformerCtx *ctx, iptr handle, BeamformFrame *frame) 287 { 288 uv3 dim = frame->dim; 289 size out_size = dim.x * dim.y * dim.z * 2 * sizeof(f32); 290 ctx->export_buffer = ctx->platform.alloc_arena(ctx->export_buffer, out_size); 291 glGetTextureImage(frame->texture, 0, GL_RG, GL_FLOAT, out_size, ctx->export_buffer.beg); 292 s8 raw = {.len = out_size, .data = ctx->export_buffer.beg}; 293 if (!ctx->platform.write_file(handle, raw)) 294 TraceLog(LOG_WARNING, "failed to export frame\n"); 295 ctx->platform.close(handle); 296 } 297 298 static void 299 do_sum_shader(ComputeShaderCtx *cs, u32 *in_textures, u32 in_texture_count, f32 in_scale, 300 u32 out_texture, uv3 out_data_dim) 301 { 302 /* NOTE: zero output before summing */ 303 glClearTexImage(out_texture, 0, GL_RED, GL_FLOAT, 0); 304 305 glBindImageTexture(0, out_texture, 0, GL_TRUE, 0, GL_READ_WRITE, GL_RG32F); 306 glUniform1f(cs->sum_prescale_id, in_scale); 307 for (u32 i = 0; i < in_texture_count; i++) { 308 glBindImageTexture(1, in_textures[i], 0, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F); 309 glDispatchCompute(ORONE(out_data_dim.x / 32), 310 ORONE(out_data_dim.y), 311 ORONE(out_data_dim.z / 32)); 312 glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); 313 } 314 } 315 316 static void 317 do_beamform_shader(ComputeShaderCtx *cs, BeamformerParameters *bp, BeamformFrame *frame, 318 u32 rf_ssbo, iv3 dispatch_dim, iv3 compute_dim_offset, i32 compute_pass) 319 { 320 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, rf_ssbo); 321 glUniform3iv(cs->volume_export_dim_offset_id, 1, compute_dim_offset.E); 322 glUniform1i(cs->volume_export_pass_id, compute_pass); 323 324 glBindImageTexture(0, frame->texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F); 325 glDispatchCompute(ORONE(dispatch_dim.x / 32), 326 ORONE(dispatch_dim.y), 327 ORONE(dispatch_dim.z / 32)); 328 } 329 330 static b32 331 do_partial_compute_step(BeamformerCtx *ctx, BeamformFrame *frame) 332 { 333 ComputeShaderCtx *cs = &ctx->csctx; 334 PartialComputeCtx *pc = &ctx->partial_compute_ctx; 335 336 b32 done = 0; 337 338 /* NOTE: we start this elsewhere on the first dispatch so that we can include 339 * times such as decoding/demodulation/etc. */ 340 if (!pc->timer_active) { 341 glQueryCounter(pc->timer_ids[0], GL_TIMESTAMP); 342 pc->timer_active = 1; 343 } 344 345 glBeginQuery(GL_TIME_ELAPSED, cs->timer_ids[cs->timer_index][pc->shader]); 346 cs->timer_active[cs->timer_index][pc->shader] = 1; 347 348 glUseProgram(cs->programs[pc->shader]); 349 350 /* NOTE: We must tile this otherwise GL will kill us for taking too long */ 351 /* TODO: this could be based on multiple dimensions */ 352 i32 dispatch_count = frame->dim.z / 32; 353 iv3 dim_offset = {.z = !!dispatch_count * 32 * pc->dispatch_index++}; 354 iv3 dispatch_dim = {.x = frame->dim.x, .y = frame->dim.y, .z = 1}; 355 do_beamform_shader(cs, &ctx->params->raw, frame, pc->rf_data_ssbo, dispatch_dim, dim_offset, 1); 356 357 if (pc->dispatch_index >= dispatch_count) { 358 pc->dispatch_index = 0; 359 done = 1; 360 } 361 362 glQueryCounter(pc->timer_ids[1], GL_TIMESTAMP); 363 364 glEndQuery(GL_TIME_ELAPSED); 365 366 return done; 367 } 368 369 static void 370 do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformFrame *frame, u32 raw_data_index, 371 enum compute_shaders shader) 372 { 373 ComputeShaderCtx *csctx = &ctx->csctx; 374 uv2 rf_raw_dim = ctx->params->raw.rf_raw_dim; 375 size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16); 376 377 glBeginQuery(GL_TIME_ELAPSED, csctx->timer_ids[csctx->timer_index][shader]); 378 csctx->timer_active[csctx->timer_index][shader] = 1; 379 380 glUseProgram(csctx->programs[shader]); 381 382 u32 output_ssbo_idx = !csctx->last_output_ssbo_index; 383 u32 input_ssbo_idx = csctx->last_output_ssbo_index; 384 385 switch (shader) { 386 case CS_HADAMARD: 387 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, csctx->raw_data_ssbo, 388 raw_data_index * rf_raw_size, rf_raw_size); 389 390 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]); 391 glBindImageTexture(0, csctx->hadamard_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8I); 392 glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32), 393 ORONE(csctx->dec_data_dim.y / 32), 394 ORONE(csctx->dec_data_dim.z)); 395 csctx->raw_data_fences[raw_data_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); 396 csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index; 397 break; 398 case CS_CUDA_DECODE: 399 ctx->cuda_lib.cuda_decode(raw_data_index * rf_raw_size, output_ssbo_idx, 0); 400 csctx->raw_data_fences[raw_data_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); 401 csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index; 402 break; 403 case CS_CUDA_HILBERT: 404 ctx->cuda_lib.cuda_hilbert(input_ssbo_idx, output_ssbo_idx); 405 csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index; 406 break; 407 case CS_DEMOD: 408 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]); 409 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]); 410 glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32), 411 ORONE(csctx->dec_data_dim.y / 32), 412 ORONE(csctx->dec_data_dim.z)); 413 csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index; 414 break; 415 case CS_MIN_MAX: { 416 u32 texture = frame->texture; 417 for (u32 i = 1; i < frame->mips; i++) { 418 glBindImageTexture(0, texture, i - 1, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F); 419 glBindImageTexture(1, texture, i - 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F); 420 glUniform1i(csctx->mips_level_id, i); 421 422 u32 width = frame->dim.x >> i; 423 u32 height = frame->dim.y >> i; 424 u32 depth = frame->dim.z >> i; 425 glDispatchCompute(ORONE(width / 32), ORONE(height), ORONE(depth / 32)); 426 glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); 427 } 428 } break; 429 case CS_DAS: { 430 u32 rf_ssbo = csctx->rf_data_ssbos[input_ssbo_idx]; 431 iv3 dispatch_dim = {.x = frame->dim.x, .y = frame->dim.y, .z = frame->dim.z}; 432 do_beamform_shader(csctx, &ctx->params->raw, frame, rf_ssbo, dispatch_dim, (iv3){0}, 0); 433 } break; 434 case CS_SUM: { 435 u32 frame_count = 0; 436 u32 *in_textures = alloc(&arena, u32, MAX_BEAMFORMED_SAVED_FRAMES); 437 BeamformFrameIterator bfi = beamform_frame_iterator(ctx); 438 for (BeamformFrame *frame = frame_next(&bfi); frame; frame = frame_next(&bfi)) 439 in_textures[frame_count++] = frame->texture; 440 do_sum_shader(csctx, in_textures, frame_count, 1 / (f32)frame_count, 441 ctx->averaged_frame.texture, ctx->averaged_frame.dim); 442 } break; 443 default: ASSERT(0); 444 } 445 446 glEndQuery(GL_TIME_ELAPSED); 447 } 448 449 static BeamformFrame * 450 start_beamform_compute_work(BeamformWork *work, ComputeShaderCtx *cs, BeamformerParametersFull *bpf) 451 { 452 BeamformFrame *result = work->compute_ctx.frame; 453 if (bpf->upload) { 454 glNamedBufferSubData(cs->shared_ubo, 0, sizeof(bpf->raw), &bpf->raw); 455 bpf->upload = 0; 456 } 457 458 result->min_coordinate = bpf->raw.output_min_coordinate; 459 result->max_coordinate = bpf->raw.output_max_coordinate; 460 461 return result; 462 } 463 464 static void 465 do_beamform_work(BeamformerCtx *ctx, Arena *a) 466 { 467 BeamformWorkQueue *q = &ctx->beamform_work_queue; 468 BeamformWork *work = beamform_work_queue_pop(q); 469 ComputeShaderCtx *cs = &ctx->csctx; 470 471 while (work) { 472 switch (work->type) { 473 case BW_PARTIAL_COMPUTE: { 474 BeamformFrame *frame = work->compute_ctx.frame; 475 476 if (work->compute_ctx.first_pass) { 477 start_beamform_compute_work(work, cs, ctx->params); 478 479 PartialComputeCtx *pc = &ctx->partial_compute_ctx; 480 pc->runtime = 0; 481 pc->timer_active = 1; 482 glQueryCounter(pc->timer_ids[0], GL_TIMESTAMP); 483 glDeleteBuffers(1, &pc->rf_data_ssbo); 484 glCreateBuffers(1, &pc->rf_data_ssbo); 485 glNamedBufferStorage(pc->rf_data_ssbo, decoded_data_size(cs), 0, 0); 486 LABEL_GL_OBJECT(GL_BUFFER, pc->rf_data_ssbo, s8("Volume_RF_SSBO")); 487 488 /* TODO: maybe we should have some concept of compute shader 489 * groups, then we could define a group that does the decoding 490 * and filtering and apply that group directly here. For now 491 * we will do this dumb thing */ 492 u32 stage_count = ctx->params->compute_stages_count; 493 enum compute_shaders *stages = ctx->params->compute_stages; 494 for (u32 i = 0; i < stage_count; i++) { 495 if (stages[i] == CS_DAS) { 496 ctx->partial_compute_ctx.shader = stages[i]; 497 break; 498 } 499 do_compute_shader(ctx, *a, frame, 500 work->compute_ctx.raw_data_ssbo_index, 501 stages[i]); 502 } 503 u32 output_ssbo = pc->rf_data_ssbo; 504 u32 input_ssbo = cs->rf_data_ssbos[cs->last_output_ssbo_index]; 505 size rf_size = decoded_data_size(cs); 506 glCopyNamedBufferSubData(input_ssbo, output_ssbo, 0, 0, rf_size); 507 } 508 509 b32 done = do_partial_compute_step(ctx, frame); 510 if (!done) { 511 BeamformWork *new; 512 /* NOTE: this push must not fail */ 513 new = beamform_work_queue_push(ctx, a, BW_PARTIAL_COMPUTE); 514 new->compute_ctx.first_pass = 0; 515 new->compute_ctx.frame = frame; 516 new->compute_ctx.export_handle = work->compute_ctx.export_handle; 517 } else if (work->compute_ctx.export_handle != INVALID_FILE) { 518 export_frame(ctx, work->compute_ctx.export_handle, frame); 519 work->compute_ctx.export_handle = INVALID_FILE; 520 /* NOTE: do not waste a bunch of GPU space holding onto the volume 521 * texture if it was just for export */ 522 glDeleteTextures(1, &frame->texture); 523 mem_clear(frame, 0, sizeof(*frame)); 524 } 525 } break; 526 case BW_FULL_COMPUTE: 527 case BW_RECOMPUTE: { 528 BeamformFrame *frame = start_beamform_compute_work(work, cs, ctx->params); 529 530 u32 stage_count = ctx->params->compute_stages_count; 531 enum compute_shaders *stages = ctx->params->compute_stages; 532 for (u32 i = 0; i < stage_count; i++) 533 do_compute_shader(ctx, *a, frame, work->compute_ctx.raw_data_ssbo_index, 534 stages[i]); 535 536 if (work->compute_ctx.export_handle != INVALID_FILE) { 537 export_frame(ctx, work->compute_ctx.export_handle, frame); 538 work->compute_ctx.export_handle = INVALID_FILE; 539 } 540 541 ctx->fsctx.gen_mipmaps = 1; 542 } break; 543 } 544 545 546 work->next = q->next_free; 547 q->next_free = work; 548 work = beamform_work_queue_pop(q); 549 } 550 551 if (q->did_compute_this_frame) { 552 u32 tidx = ctx->csctx.timer_index; 553 glDeleteSync(ctx->csctx.timer_fences[tidx]); 554 ctx->csctx.timer_fences[tidx] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); 555 ctx->csctx.timer_index = (tidx + 1) % ARRAY_COUNT(ctx->csctx.timer_fences); 556 } 557 } 558 559 static void 560 check_compute_timers(ComputeShaderCtx *cs, PartialComputeCtx *pc, BeamformerParametersFull *bp) 561 { 562 /* NOTE: volume generation running timer */ 563 if (pc->timer_active) { 564 u64 start_ns = 0, end_ns = 0; 565 glGetQueryObjectui64v(pc->timer_ids[0], GL_QUERY_RESULT, &start_ns); 566 glGetQueryObjectui64v(pc->timer_ids[1], GL_QUERY_RESULT, &end_ns); 567 u64 elapsed_ns = end_ns - start_ns; 568 pc->runtime += (f32)elapsed_ns * 1e-9; 569 pc->timer_active = 0; 570 } 571 572 /* NOTE: main timers for display portion of the program */ 573 u32 last_idx = (cs->timer_index - 1) % ARRAY_COUNT(cs->timer_fences); 574 if (!cs->timer_fences[last_idx]) 575 return; 576 577 i32 status = glClientWaitSync(cs->timer_fences[last_idx], 0, 0); 578 if (status == GL_TIMEOUT_EXPIRED || status == GL_WAIT_FAILED) 579 return; 580 glDeleteSync(cs->timer_fences[last_idx]); 581 cs->timer_fences[last_idx] = NULL; 582 583 for (u32 i = 0; i < bp->compute_stages_count; i++) { 584 u64 ns = 0; 585 i32 idx = bp->compute_stages[i]; 586 if (cs->timer_active[last_idx][idx]) { 587 glGetQueryObjectui64v(cs->timer_ids[last_idx][idx], GL_QUERY_RESULT, &ns); 588 cs->timer_active[last_idx][idx] = 0; 589 } 590 cs->last_frame_time[idx] = (f32)ns / 1e9; 591 } 592 } 593 594 #include "ui.c" 595 596 DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) 597 { 598 dt_for_frame = GetFrameTime(); 599 600 cycle_t += dt_for_frame; 601 if (cycle_t > 1) cycle_t -= 1; 602 glProgramUniform1f(ctx->csctx.programs[CS_DAS], ctx->csctx.cycle_t_id, cycle_t); 603 604 if (IsWindowResized()) { 605 ctx->window_size.h = GetScreenHeight(); 606 ctx->window_size.w = GetScreenWidth(); 607 } 608 609 if (input->executable_reloaded) { 610 ui_init(ctx, ctx->ui_backing_store); 611 } 612 613 if (ctx->flags & START_COMPUTE) { 614 if (ui_can_start_compute(ctx)) 615 ui_start_compute(ctx); 616 ctx->flags &= ~START_COMPUTE; 617 } 618 619 /* NOTE: Store the compute time for the last frame. */ 620 check_compute_timers(&ctx->csctx, &ctx->partial_compute_ctx, ctx->params); 621 622 BeamformerParameters *bp = &ctx->params->raw; 623 /* NOTE: Check for and Load RF Data into GPU */ 624 if (input->pipe_data_available) { 625 BeamformWork *work = beamform_work_queue_push(ctx, arena, BW_FULL_COMPUTE); 626 /* NOTE: we can only read in the new data if we get back a work item. 627 * otherwise we have too many frames in flight and should wait until the 628 * next frame to try again */ 629 if (work) { 630 ComputeShaderCtx *cs = &ctx->csctx; 631 if (!uv4_equal(cs->dec_data_dim, bp->dec_data_dim)) { 632 alloc_shader_storage(ctx, *arena); 633 /* TODO: we may need to invalidate all queue items here */ 634 } 635 636 if (ctx->params->export_next_frame) { 637 /* TODO: we don't really want the beamformer opening/closing files */ 638 iptr f = ctx->platform.open_for_write(ctx->params->export_pipe_name); 639 work->compute_ctx.export_handle = f; 640 ctx->params->export_next_frame = 0; 641 } else { 642 work->compute_ctx.export_handle = INVALID_FILE; 643 } 644 645 b32 output_3d = bp->output_points.x > 1 && bp->output_points.y > 1 && 646 bp->output_points.z > 1; 647 648 if (output_3d) { 649 work->type = BW_PARTIAL_COMPUTE; 650 BeamformFrame *frame = &ctx->partial_compute_ctx.frame; 651 uv3 out_dim = ctx->params->raw.output_points.xyz; 652 alloc_beamform_frame(&ctx->gl, frame, out_dim, 0, s8("Beamformed_Volume")); 653 work->compute_ctx.frame = frame; 654 } 655 656 u32 raw_index = work->compute_ctx.raw_data_ssbo_index; 657 uv2 rf_raw_dim = cs->rf_raw_dim; 658 size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16); 659 void *rf_data_buf = cs->raw_data_arena.beg + raw_index * rf_raw_size; 660 661 alloc_output_image(ctx, bp->output_points.xyz); 662 663 size rlen = ctx->platform.read_pipe(input->pipe_handle, rf_data_buf, rf_raw_size); 664 if (rlen != rf_raw_size) { 665 stream_append_s8(&ctx->error_stream, s8("Partial Read Occurred: ")); 666 stream_append_i64(&ctx->error_stream, rlen); 667 stream_append_byte(&ctx->error_stream, '/'); 668 stream_append_i64(&ctx->error_stream, rf_raw_size); 669 stream_append_s8(&ctx->error_stream, s8("\n\0")); 670 TraceLog(LOG_WARNING, (c8 *)stream_to_s8(&ctx->error_stream).data); 671 ctx->error_stream.widx = 0; 672 } else { 673 switch (ctx->gl.vendor_id) { 674 case GL_VENDOR_AMD: 675 case GL_VENDOR_ARM: 676 case GL_VENDOR_INTEL: 677 break; 678 case GL_VENDOR_NVIDIA: 679 glNamedBufferSubData(cs->raw_data_ssbo, raw_index * rlen, 680 rlen, rf_data_buf); 681 } 682 } 683 } 684 } 685 686 ctx->beamform_work_queue.did_compute_this_frame = 0; 687 do_beamform_work(ctx, arena); 688 689 /* NOTE: draw output image texture using render fragment shader */ 690 BeamformFrame *frame_to_draw = 0; 691 BeginTextureMode(ctx->fsctx.output); 692 ClearBackground(PINK); 693 BeginShaderMode(ctx->fsctx.shader); 694 FragmentShaderCtx *fs = &ctx->fsctx; 695 glUseProgram(fs->shader.id); 696 u32 out_texture = 0; 697 if (bp->output_points.w > 1) { 698 frame_to_draw = &ctx->averaged_frame; 699 out_texture = ctx->averaged_frame.texture; 700 } else { 701 frame_to_draw = ctx->beamform_frames + ctx->displayed_frame_index; 702 out_texture = frame_to_draw->texture; 703 } 704 glBindTextureUnit(0, out_texture); 705 glUniform1f(fs->db_cutoff_id, fs->db); 706 glUniform1f(fs->threshold_id, fs->threshold); 707 DrawTexture(fs->output.texture, 0, 0, WHITE); 708 EndShaderMode(); 709 EndTextureMode(); 710 711 /* NOTE: regenerate mipmaps only when the output has actually changed */ 712 if (ctx->fsctx.gen_mipmaps) { 713 /* NOTE: shut up raylib's reporting on mipmap gen */ 714 SetTraceLogLevel(LOG_NONE); 715 GenTextureMipmaps(&ctx->fsctx.output.texture); 716 SetTraceLogLevel(LOG_INFO); 717 ctx->fsctx.gen_mipmaps = 0; 718 } 719 720 draw_ui(ctx, input, frame_to_draw); 721 722 if (WindowShouldClose()) 723 ctx->flags |= SHOULD_EXIT; 724 }