beamformer.c (26848B)
1 /* See LICENSE for license details. */ 2 #include "beamformer.h" 3 4 static f32 dt_for_frame; 5 static f32 cycle_t; 6 7 static size 8 decoded_data_size(ComputeShaderCtx *cs) 9 { 10 uv4 dim = cs->dec_data_dim; 11 size result = 2 * sizeof(f32) * dim.x * dim.y * dim.z; 12 return result; 13 } 14 15 static uv4 16 make_valid_test_dim(uv4 in) 17 { 18 uv4 result; 19 result.x = MAX(in.x, 1); 20 result.y = MAX(in.y, 1); 21 result.z = MAX(in.z, 1); 22 result.w = 1; 23 return result; 24 } 25 26 static BeamformFrameIterator 27 beamform_frame_iterator(BeamformerCtx *ctx) 28 { 29 BeamformFrameIterator result; 30 result.frames = ctx->beamform_frames; 31 result.offset = ctx->displayed_frame_index; 32 result.capacity = ARRAY_COUNT(ctx->beamform_frames); 33 result.cursor = 0; 34 result.needed_frames = ORONE(ctx->params->raw.output_points.w); 35 return result; 36 } 37 38 static BeamformFrame * 39 frame_next(BeamformFrameIterator *bfi) 40 { 41 BeamformFrame *result = 0; 42 if (bfi->cursor != bfi->needed_frames) { 43 u32 index = (bfi->offset - bfi->cursor++) % bfi->capacity; 44 result = bfi->frames + index; 45 } 46 return result; 47 } 48 49 static void 50 alloc_beamform_frame(GLParams *gp, BeamformFrame *out, uv4 out_dim, u32 frame_index, s8 name) 51 { 52 glDeleteTextures(out->dim.w, out->textures); 53 54 out->dim.x = CLAMP(round_down_power_of_2(ORONE(out_dim.x)), 1, gp->max_3d_texture_dim); 55 out->dim.y = CLAMP(round_down_power_of_2(ORONE(out_dim.y)), 1, gp->max_3d_texture_dim); 56 out->dim.z = CLAMP(round_down_power_of_2(ORONE(out_dim.z)), 1, gp->max_3d_texture_dim); 57 out->dim.w = CLAMP(out_dim.w, 0, MAX_MULTI_XDC_COUNT); 58 59 /* NOTE: allocate storage for beamformed output data; 60 * this is shared between compute and fragment shaders */ 61 u32 max_dim = MAX(out->dim.x, MAX(out->dim.y, out->dim.z)); 62 out->mips = ctz_u32(max_dim) + 1; 63 64 u8 buf[256]; 65 Stream label = {.data = buf, .cap = ARRAY_COUNT(buf)}; 66 stream_append_s8(&label, name); 67 stream_append_byte(&label, '['); 68 stream_append_u64(&label, frame_index); 69 stream_append_s8(&label, s8("][")); 70 u32 sidx = label.widx; 71 72 glCreateTextures(GL_TEXTURE_3D, out->dim.w, out->textures); 73 for (u32 i = 0; i < out->dim.w; i++) { 74 glTextureStorage3D(out->textures[i], out->mips, GL_RG32F, 75 out->dim.x, out->dim.y, out->dim.z); 76 stream_append_u64(&label, i); 77 stream_append_byte(&label, ']'); 78 LABEL_GL_OBJECT(GL_TEXTURE, out->textures[i], stream_to_s8(&label)); 79 label.widx = sidx; 80 } 81 } 82 83 static void 84 alloc_output_image(BeamformerCtx *ctx, uv4 output_dim) 85 { 86 uv4 try_dim = make_valid_test_dim(output_dim); 87 if (!uv4_equal(try_dim, ctx->averaged_frame.dim)) { 88 alloc_beamform_frame(&ctx->gl, &ctx->averaged_frame, try_dim, 0, 89 s8("Beamformed_Averaged_Data")); 90 uv4 odim = ctx->averaged_frame.dim; 91 92 UnloadRenderTexture(ctx->fsctx.output); 93 /* TODO: select odim.x vs odim.y */ 94 ctx->fsctx.output = LoadRenderTexture(odim.x, odim.z); 95 LABEL_GL_OBJECT(GL_FRAMEBUFFER, ctx->fsctx.output.id, s8("Rendered_View")); 96 GenTextureMipmaps(&ctx->fsctx.output.texture); 97 //SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_ANISOTROPIC_8X); 98 //SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_TRILINEAR); 99 SetTextureFilter(ctx->fsctx.output.texture, TEXTURE_FILTER_BILINEAR); 100 } 101 } 102 103 static void 104 alloc_shader_storage(BeamformerCtx *ctx, Arena a) 105 { 106 ComputeShaderCtx *cs = &ctx->csctx; 107 BeamformerParameters *bp = &ctx->params->raw; 108 uv4 dec_data_dim = bp->dec_data_dim; 109 uv2 rf_raw_dim = bp->rf_raw_dim; 110 ctx->csctx.dec_data_dim = dec_data_dim; 111 ctx->csctx.rf_raw_dim = rf_raw_dim; 112 size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16); 113 size rf_decoded_size = decoded_data_size(cs); 114 115 glDeleteBuffers(ARRAY_COUNT(cs->rf_data_ssbos), cs->rf_data_ssbos); 116 glCreateBuffers(ARRAY_COUNT(cs->rf_data_ssbos), cs->rf_data_ssbos); 117 118 i32 storage_flags = GL_DYNAMIC_STORAGE_BIT; 119 switch (ctx->gl.vendor_id) { 120 case GL_VENDOR_AMD: 121 case GL_VENDOR_ARM: 122 case GL_VENDOR_INTEL: 123 if (cs->raw_data_ssbo) 124 glUnmapNamedBuffer(cs->raw_data_ssbo); 125 storage_flags |= GL_MAP_WRITE_BIT|GL_MAP_PERSISTENT_BIT; 126 case GL_VENDOR_NVIDIA: 127 /* NOTE: register_cuda_buffers will handle the updated ssbo */ 128 break; 129 } 130 131 size full_rf_buf_size = ARRAY_COUNT(cs->raw_data_fences) * rf_raw_size; 132 glDeleteBuffers(1, &cs->raw_data_ssbo); 133 glCreateBuffers(1, &cs->raw_data_ssbo); 134 glNamedBufferStorage(cs->raw_data_ssbo, full_rf_buf_size, 0, storage_flags); 135 LABEL_GL_OBJECT(GL_BUFFER, cs->raw_data_ssbo, s8("Raw_Data_SSBO")); 136 137 Stream label = stream_alloc(&a, 256); 138 stream_append_s8(&label, s8("RF_SSBO_")); 139 u32 s_widx = label.widx; 140 for (u32 i = 0; i < ARRAY_COUNT(cs->rf_data_ssbos); i++) { 141 glNamedBufferStorage(cs->rf_data_ssbos[i], rf_decoded_size, 0, 0); 142 stream_append_u64(&label, i); 143 s8 rf_label = stream_to_s8(&label); 144 LABEL_GL_OBJECT(GL_BUFFER, cs->rf_data_ssbos[i], rf_label); 145 label.widx = s_widx; 146 } 147 148 i32 map_flags = GL_MAP_WRITE_BIT|GL_MAP_PERSISTENT_BIT|GL_MAP_UNSYNCHRONIZED_BIT; 149 switch (ctx->gl.vendor_id) { 150 case GL_VENDOR_AMD: 151 case GL_VENDOR_ARM: 152 case GL_VENDOR_INTEL: 153 cs->raw_data_arena.beg = glMapNamedBufferRange(cs->raw_data_ssbo, 0, 154 full_rf_buf_size, map_flags); 155 break; 156 case GL_VENDOR_NVIDIA: 157 cs->raw_data_arena = ctx->platform.alloc_arena(cs->raw_data_arena, full_rf_buf_size); 158 ctx->cuda_lib.register_cuda_buffers(cs->rf_data_ssbos, ARRAY_COUNT(cs->rf_data_ssbos), 159 cs->raw_data_ssbo); 160 ctx->cuda_lib.init_cuda_configuration(bp->rf_raw_dim.E, bp->dec_data_dim.E, 161 bp->channel_mapping); 162 break; 163 } 164 165 /* NOTE: store hadamard in GPU once; it won't change for a particular imaging session */ 166 cs->hadamard_dim = (uv2){.x = dec_data_dim.z, .y = dec_data_dim.z}; 167 size hadamard_elements = dec_data_dim.z * dec_data_dim.z; 168 i32 *hadamard = alloc(&a, i32, hadamard_elements); 169 i32 *tmp = alloc(&a, i32, hadamard_elements); 170 fill_hadamard_transpose(hadamard, tmp, dec_data_dim.z); 171 glDeleteBuffers(1, &cs->hadamard_ssbo); 172 glCreateBuffers(1, &cs->hadamard_ssbo); 173 glNamedBufferStorage(cs->hadamard_ssbo, hadamard_elements * sizeof(i32), hadamard, 0); 174 LABEL_GL_OBJECT(GL_BUFFER, cs->hadamard_ssbo, s8("Hadamard_SSBO")); 175 } 176 177 static BeamformWork * 178 beamform_work_queue_pop(BeamformWorkQueue *q) 179 { 180 BeamformWork *result = q->first; 181 if (result) { 182 switch (result->type) { 183 case BW_FULL_COMPUTE: 184 case BW_RECOMPUTE: 185 case BW_PARTIAL_COMPUTE: 186 /* NOTE: only one compute is allowed per frame */ 187 if (q->did_compute_this_frame) { 188 result = 0; 189 } else { 190 q->compute_in_flight--; 191 q->did_compute_this_frame = 1; 192 ASSERT(q->compute_in_flight >= 0); 193 } 194 break; 195 } 196 } 197 /* NOTE: only do this once we have determined if we are doing the work */ 198 if (result) { 199 q->first = result->next; 200 if (result == q->last) { 201 ASSERT(result->next == 0); 202 q->last = 0; 203 } 204 } 205 206 return result; 207 } 208 209 static BeamformWork * 210 beamform_work_queue_push(BeamformerCtx *ctx, Arena *a, enum beamform_work work_type) 211 { 212 /* TODO: we should have a sub arena specifically for this purpose */ 213 214 BeamformWorkQueue *q = &ctx->beamform_work_queue; 215 ComputeShaderCtx *cs = &ctx->csctx; 216 217 BeamformWork *result = q->next_free; 218 if (result) q->next_free = result->next; 219 else result = alloc(a, typeof(*result), 1); 220 221 if (result) { 222 result->type = work_type; 223 result->next = 0; 224 225 switch (work_type) { 226 case BW_FULL_COMPUTE: 227 if (q->compute_in_flight >= ARRAY_COUNT(cs->raw_data_fences)) { 228 result->next = q->next_free; 229 q->next_free = result; 230 result = 0; 231 break; 232 } 233 cs->raw_data_index++; 234 if (cs->raw_data_index >= ARRAY_COUNT(cs->raw_data_fences)) 235 cs->raw_data_index = 0; 236 /* FALLTHROUGH */ 237 case BW_RECOMPUTE: { 238 i32 raw_index = cs->raw_data_index; 239 result->compute_ctx.raw_data_ssbo_index = raw_index; 240 /* NOTE: if this times out it means the command queue is more than 3 241 * frames behind. In that case we need to re-evaluate the buffer size */ 242 if (cs->raw_data_fences[raw_index]) { 243 i32 result = glClientWaitSync(cs->raw_data_fences[raw_index], 0, 244 10000); 245 if (result == GL_TIMEOUT_EXPIRED) { 246 //ASSERT(0); 247 } 248 glDeleteSync(cs->raw_data_fences[raw_index]); 249 cs->raw_data_fences[raw_index] = NULL; 250 } 251 ctx->displayed_frame_index++; 252 if (ctx->displayed_frame_index >= ARRAY_COUNT(ctx->beamform_frames)) 253 ctx->displayed_frame_index = 0; 254 result->compute_ctx.frame = ctx->beamform_frames + ctx->displayed_frame_index; 255 result->compute_ctx.first_pass = 1; 256 257 BeamformFrameIterator bfi = beamform_frame_iterator(ctx); 258 for (BeamformFrame *frame = frame_next(&bfi); frame; frame = frame_next(&bfi)) { 259 uv4 try_dim = ctx->params->raw.output_points; 260 try_dim.w = ctx->params->raw.xdc_count; 261 if (!uv4_equal(frame->dim, try_dim)) { 262 u32 index = (bfi.offset - bfi.cursor) % bfi.capacity; 263 alloc_beamform_frame(&ctx->gl, frame, try_dim, index, 264 s8("Beamformed_Data")); 265 } 266 } 267 } /* FALLTHROUGH */ 268 case BW_PARTIAL_COMPUTE: 269 q->compute_in_flight++; 270 case BW_SAVE_FRAME: 271 case BW_SEND_FRAME: 272 case BW_SSBO_COPY: 273 break; 274 } 275 276 if (result) { 277 if (q->last) q->last = q->last->next = result; 278 else q->last = q->first = result; 279 } 280 } 281 282 return result; 283 } 284 285 static m4 286 v3_to_xdc_space(v3 direction, v3 origin, v3 corner1, v3 corner2) 287 { 288 v3 edge1 = sub_v3(corner1, origin); 289 v3 edge2 = sub_v3(corner2, origin); 290 v3 xdc_normal = cross(edge1, edge2); 291 if (xdc_normal.z < 0) 292 xdc_normal = cross(edge2, edge1); 293 ASSERT(xdc_normal.z >= 0); 294 295 v3 e1 = normalize_v3(sub_v3(direction, xdc_normal)); 296 v3 e2 = {.y = 1}; 297 v3 e3 = normalize_v3(cross(e2, e1)); 298 v4 e4 = {.x = -origin.x, .y = -origin.y, .z = -origin.z, .w = 1}; 299 300 m4 result = { 301 .c[0] = (v4){.x = e3.x, .y = e2.x, .z = e1.x, .w = 0}, 302 .c[1] = (v4){.x = e3.y, .y = e2.y, .z = e1.y, .w = 0}, 303 .c[2] = (v4){.x = e3.z, .y = e2.z, .z = e1.z, .w = 0}, 304 .c[3] = e4, 305 }; 306 307 return result; 308 } 309 310 static v4 311 f32_4_to_v4(f32 *in) 312 { 313 v4 result; 314 store_f32x4(load_f32x4(in), result.E); 315 return result; 316 } 317 318 static void 319 export_frame(BeamformerCtx *ctx, iptr handle, BeamformFrame *frame) 320 { 321 uv3 dim = frame->dim.xyz; 322 size out_size = dim.x * dim.y * dim.z * 2 * sizeof(f32); 323 ctx->export_buffer = ctx->platform.alloc_arena(ctx->export_buffer, out_size); 324 u32 texture = frame->textures[frame->dim.w - 1]; 325 glGetTextureImage(texture, 0, GL_RG, GL_FLOAT, out_size, ctx->export_buffer.beg); 326 s8 raw = {.len = out_size, .data = ctx->export_buffer.beg}; 327 if (!ctx->platform.write_file(handle, raw)) 328 TraceLog(LOG_WARNING, "failed to export frame\n"); 329 ctx->platform.close(handle); 330 } 331 332 static void 333 do_sum_shader(ComputeShaderCtx *cs, u32 *in_textures, u32 in_texture_count, f32 in_scale, 334 u32 out_texture, uv4 out_data_dim) 335 { 336 /* NOTE: zero output before summing */ 337 glClearTexImage(out_texture, 0, GL_RED, GL_FLOAT, 0); 338 339 glBindImageTexture(0, out_texture, 0, GL_TRUE, 0, GL_READ_WRITE, GL_RG32F); 340 glUniform1f(cs->sum_prescale_id, in_scale); 341 for (u32 i = 0; i < in_texture_count; i++) { 342 glBindImageTexture(1, in_textures[i], 0, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F); 343 glDispatchCompute(ORONE(out_data_dim.x / 32), 344 ORONE(out_data_dim.y), 345 ORONE(out_data_dim.z / 32)); 346 glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); 347 } 348 } 349 350 static void 351 do_beamform_shader(ComputeShaderCtx *cs, BeamformerParameters *bp, BeamformFrame *frame, 352 u32 rf_ssbo, iv3 dispatch_dim, iv3 compute_dim_offset, i32 compute_pass) 353 { 354 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, rf_ssbo); 355 glUniform3iv(cs->volume_export_dim_offset_id, 1, compute_dim_offset.E); 356 glUniform1i(cs->volume_export_pass_id, compute_pass); 357 358 for (u32 i = 0; i < frame->dim.w; i++) { 359 u32 texture = frame->textures[i]; 360 m4 xdc_transform = v3_to_xdc_space((v3){.z = 1}, 361 f32_4_to_v4(bp->xdc_origin + (4 * i)).xyz, 362 f32_4_to_v4(bp->xdc_corner1 + (4 * i)).xyz, 363 f32_4_to_v4(bp->xdc_corner2 + (4 * i)).xyz); 364 glBindImageTexture(0, texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F); 365 glUniform1i(cs->xdc_index_id, i); 366 glUniformMatrix4fv(cs->xdc_transform_id, 1, GL_FALSE, xdc_transform.E); 367 glDispatchCompute(ORONE(dispatch_dim.x / 32), 368 ORONE(dispatch_dim.y), 369 ORONE(dispatch_dim.z / 32)); 370 } 371 } 372 373 static b32 374 do_partial_compute_step(BeamformerCtx *ctx, BeamformFrame *frame) 375 { 376 ComputeShaderCtx *cs = &ctx->csctx; 377 PartialComputeCtx *pc = &ctx->partial_compute_ctx; 378 379 b32 done = 0; 380 381 /* NOTE: we start this elsewhere on the first dispatch so that we can include 382 * times such as decoding/demodulation/etc. */ 383 if (!pc->timer_active) { 384 glQueryCounter(pc->timer_ids[0], GL_TIMESTAMP); 385 pc->timer_active = 1; 386 } 387 388 glBeginQuery(GL_TIME_ELAPSED, cs->timer_ids[cs->timer_index][pc->shader]); 389 cs->timer_active[cs->timer_index][pc->shader] = 1; 390 391 glUseProgram(cs->programs[pc->shader]); 392 393 /* NOTE: We must tile this otherwise GL will kill us for taking too long */ 394 /* TODO: this could be based on multiple dimensions */ 395 i32 dispatch_count = frame->dim.z / 32; 396 iv3 dim_offset = {.z = !!dispatch_count * 32 * pc->dispatch_index++}; 397 iv3 dispatch_dim = {.x = frame->dim.x, .y = frame->dim.y, .z = 1}; 398 do_beamform_shader(cs, &ctx->params->raw, frame, pc->rf_data_ssbo, dispatch_dim, dim_offset, 1); 399 400 if (pc->dispatch_index >= dispatch_count) { 401 pc->dispatch_index = 0; 402 done = 1; 403 } 404 405 glQueryCounter(pc->timer_ids[1], GL_TIMESTAMP); 406 407 glEndQuery(GL_TIME_ELAPSED); 408 409 return done; 410 } 411 412 static void 413 do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformFrame *frame, u32 raw_data_index, 414 enum compute_shaders shader) 415 { 416 ComputeShaderCtx *csctx = &ctx->csctx; 417 uv2 rf_raw_dim = ctx->params->raw.rf_raw_dim; 418 size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16); 419 420 glBeginQuery(GL_TIME_ELAPSED, csctx->timer_ids[csctx->timer_index][shader]); 421 csctx->timer_active[csctx->timer_index][shader] = 1; 422 423 glUseProgram(csctx->programs[shader]); 424 425 u32 output_ssbo_idx = !csctx->last_output_ssbo_index; 426 u32 input_ssbo_idx = csctx->last_output_ssbo_index; 427 428 switch (shader) { 429 case CS_HADAMARD: 430 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, csctx->raw_data_ssbo, 431 raw_data_index * rf_raw_size, rf_raw_size); 432 433 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]); 434 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, csctx->hadamard_ssbo); 435 glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32), 436 ORONE(csctx->dec_data_dim.y / 32), 437 ORONE(csctx->dec_data_dim.z)); 438 csctx->raw_data_fences[raw_data_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); 439 csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index; 440 break; 441 case CS_CUDA_DECODE: 442 ctx->cuda_lib.cuda_decode(raw_data_index * rf_raw_size, output_ssbo_idx, 443 ctx->params->raw.channel_offset); 444 csctx->raw_data_fences[raw_data_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); 445 csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index; 446 break; 447 case CS_CUDA_HILBERT: 448 ctx->cuda_lib.cuda_hilbert(input_ssbo_idx, output_ssbo_idx); 449 csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index; 450 break; 451 case CS_DEMOD: 452 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]); 453 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]); 454 glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32), 455 ORONE(csctx->dec_data_dim.y / 32), 456 ORONE(csctx->dec_data_dim.z)); 457 csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index; 458 break; 459 case CS_MIN_MAX: { 460 u32 texture = frame->textures[frame->dim.w - 1]; 461 for (u32 i = 1; i < frame->mips; i++) { 462 glBindImageTexture(0, texture, i - 1, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F); 463 glBindImageTexture(1, texture, i - 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F); 464 glUniform1i(csctx->mips_level_id, i); 465 466 u32 width = frame->dim.x >> i; 467 u32 height = frame->dim.y >> i; 468 u32 depth = frame->dim.z >> i; 469 glDispatchCompute(ORONE(width / 32), ORONE(height), ORONE(depth / 32)); 470 glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); 471 } 472 } break; 473 case CS_DAS: { 474 u32 rf_ssbo = csctx->rf_data_ssbos[input_ssbo_idx]; 475 iv3 dispatch_dim = {.x = frame->dim.x, .y = frame->dim.y, .z = frame->dim.z}; 476 do_beamform_shader(csctx, &ctx->params->raw, frame, rf_ssbo, dispatch_dim, (iv3){0}, 0); 477 if (frame->dim.w > 1) { 478 glUseProgram(csctx->programs[CS_SUM]); 479 u32 input_texture_count = frame->dim.w - 1; 480 do_sum_shader(csctx, frame->textures, input_texture_count, 481 1 / (f32)input_texture_count, frame->textures[frame->dim.w - 1], 482 frame->dim); 483 } 484 } break; 485 case CS_SUM: { 486 u32 frame_count = 0; 487 u32 *in_textures = alloc(&arena, u32, MAX_BEAMFORMED_SAVED_FRAMES); 488 BeamformFrameIterator bfi = beamform_frame_iterator(ctx); 489 for (BeamformFrame *frame = frame_next(&bfi); frame; frame = frame_next(&bfi)) { 490 ASSERT(frame->dim.w); 491 in_textures[frame_count++] = frame->textures[frame->dim.w - 1]; 492 } 493 do_sum_shader(csctx, in_textures, frame_count, 1 / (f32)frame_count, 494 ctx->averaged_frame.textures[0], ctx->averaged_frame.dim); 495 } break; 496 default: ASSERT(0); 497 } 498 499 glEndQuery(GL_TIME_ELAPSED); 500 } 501 502 static void 503 do_beamform_work(BeamformerCtx *ctx, Arena *a) 504 { 505 BeamformerParameters *bp = &ctx->params->raw; 506 BeamformWorkQueue *q = &ctx->beamform_work_queue; 507 BeamformWork *work = beamform_work_queue_pop(q); 508 ComputeShaderCtx *cs = &ctx->csctx; 509 510 while (work) { 511 switch (work->type) { 512 case BW_PARTIAL_COMPUTE: { 513 BeamformFrame *frame = work->compute_ctx.frame; 514 515 if (work->compute_ctx.first_pass) { 516 if (ctx->params->upload) { 517 glNamedBufferSubData(cs->shared_ubo, 0, sizeof(*bp), bp); 518 ctx->params->upload = 0; 519 } 520 521 PartialComputeCtx *pc = &ctx->partial_compute_ctx; 522 pc->runtime = 0; 523 pc->timer_active = 1; 524 glQueryCounter(pc->timer_ids[0], GL_TIMESTAMP); 525 glDeleteBuffers(1, &pc->rf_data_ssbo); 526 glCreateBuffers(1, &pc->rf_data_ssbo); 527 glNamedBufferStorage(pc->rf_data_ssbo, decoded_data_size(cs), 0, 0); 528 LABEL_GL_OBJECT(GL_BUFFER, pc->rf_data_ssbo, s8("Volume_RF_SSBO")); 529 530 /* TODO: maybe we should have some concept of compute shader 531 * groups, then we could define a group that does the decoding 532 * and filtering and apply that group directly here. For now 533 * we will do this dumb thing */ 534 u32 stage_count = ctx->params->compute_stages_count; 535 enum compute_shaders *stages = ctx->params->compute_stages; 536 for (u32 i = 0; i < stage_count; i++) { 537 if (stages[i] == CS_DAS) { 538 ctx->partial_compute_ctx.shader = stages[i]; 539 break; 540 } 541 do_compute_shader(ctx, *a, frame, 542 work->compute_ctx.raw_data_ssbo_index, 543 stages[i]); 544 } 545 u32 output_ssbo = pc->rf_data_ssbo; 546 u32 input_ssbo = cs->rf_data_ssbos[cs->last_output_ssbo_index]; 547 size rf_size = decoded_data_size(cs); 548 glCopyNamedBufferSubData(input_ssbo, output_ssbo, 0, 0, rf_size); 549 } 550 551 b32 done = do_partial_compute_step(ctx, frame); 552 if (!done) { 553 BeamformWork *new; 554 /* NOTE: this push must not fail */ 555 new = beamform_work_queue_push(ctx, a, BW_PARTIAL_COMPUTE); 556 new->compute_ctx.first_pass = 0; 557 new->compute_ctx.frame = frame; 558 new->compute_ctx.export_handle = work->compute_ctx.export_handle; 559 } else if (work->compute_ctx.export_handle != INVALID_FILE) { 560 export_frame(ctx, work->compute_ctx.export_handle, frame); 561 work->compute_ctx.export_handle = INVALID_FILE; 562 /* NOTE: do not waste a bunch of GPU space holding onto the volume 563 * texture if it was just for export */ 564 glDeleteTextures(frame->dim.w, frame->textures); 565 mem_clear(frame, 0, sizeof(*frame)); 566 } 567 } break; 568 case BW_FULL_COMPUTE: 569 case BW_RECOMPUTE: { 570 BeamformFrame *frame = work->compute_ctx.frame; 571 572 if (ctx->params->upload) { 573 glNamedBufferSubData(cs->shared_ubo, 0, sizeof(*bp), bp); 574 ctx->params->upload = 0; 575 } 576 577 u32 stage_count = ctx->params->compute_stages_count; 578 enum compute_shaders *stages = ctx->params->compute_stages; 579 for (u32 i = 0; i < stage_count; i++) 580 do_compute_shader(ctx, *a, frame, work->compute_ctx.raw_data_ssbo_index, 581 stages[i]); 582 583 if (work->compute_ctx.export_handle != INVALID_FILE) { 584 export_frame(ctx, work->compute_ctx.export_handle, frame); 585 work->compute_ctx.export_handle = INVALID_FILE; 586 } 587 588 ctx->flags |= GEN_MIPMAPS; 589 } break; 590 } 591 592 593 work->next = q->next_free; 594 q->next_free = work; 595 work = beamform_work_queue_pop(q); 596 } 597 598 if (q->did_compute_this_frame) { 599 u32 tidx = ctx->csctx.timer_index; 600 glDeleteSync(ctx->csctx.timer_fences[tidx]); 601 ctx->csctx.timer_fences[tidx] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); 602 ctx->csctx.timer_index = (tidx + 1) % ARRAY_COUNT(ctx->csctx.timer_fences); 603 } 604 } 605 606 static void 607 check_compute_timers(ComputeShaderCtx *cs, PartialComputeCtx *pc, BeamformerParametersFull *bp) 608 { 609 /* NOTE: volume generation running timer */ 610 if (pc->timer_active) { 611 u64 start_ns = 0, end_ns = 0; 612 glGetQueryObjectui64v(pc->timer_ids[0], GL_QUERY_RESULT, &start_ns); 613 glGetQueryObjectui64v(pc->timer_ids[1], GL_QUERY_RESULT, &end_ns); 614 u64 elapsed_ns = end_ns - start_ns; 615 pc->runtime += (f32)elapsed_ns * 1e-9; 616 pc->timer_active = 0; 617 } 618 619 /* NOTE: main timers for display portion of the program */ 620 u32 last_idx = (cs->timer_index - 1) % ARRAY_COUNT(cs->timer_fences); 621 if (!cs->timer_fences[last_idx]) 622 return; 623 624 i32 status = glClientWaitSync(cs->timer_fences[last_idx], 0, 0); 625 if (status == GL_TIMEOUT_EXPIRED || status == GL_WAIT_FAILED) 626 return; 627 glDeleteSync(cs->timer_fences[last_idx]); 628 cs->timer_fences[last_idx] = NULL; 629 630 for (u32 i = 0; i < bp->compute_stages_count; i++) { 631 u64 ns = 0; 632 i32 idx = bp->compute_stages[i]; 633 if (cs->timer_active[last_idx][idx]) { 634 glGetQueryObjectui64v(cs->timer_ids[last_idx][idx], GL_QUERY_RESULT, &ns); 635 cs->timer_active[last_idx][idx] = 0; 636 } 637 cs->last_frame_time[idx] = (f32)ns / 1e9; 638 } 639 } 640 641 #include "ui.c" 642 643 DEBUG_EXPORT BEAMFORMER_FRAME_STEP_FN(beamformer_frame_step) 644 { 645 dt_for_frame = GetFrameTime(); 646 647 cycle_t += dt_for_frame; 648 if (cycle_t > 1) cycle_t -= 1; 649 glProgramUniform1f(ctx->csctx.programs[CS_DAS], ctx->csctx.cycle_t_id, cycle_t); 650 651 if (IsWindowResized()) { 652 ctx->window_size.h = GetScreenHeight(); 653 ctx->window_size.w = GetScreenWidth(); 654 } 655 656 if (input->executable_reloaded) { 657 ui_init(ctx, ctx->ui_backing_store); 658 } 659 660 /* NOTE: Store the compute time for the last frame. */ 661 check_compute_timers(&ctx->csctx, &ctx->partial_compute_ctx, ctx->params); 662 663 BeamformerParameters *bp = &ctx->params->raw; 664 /* NOTE: Check for and Load RF Data into GPU */ 665 if (input->pipe_data_available) { 666 BeamformWork *work = beamform_work_queue_push(ctx, arena, BW_FULL_COMPUTE); 667 /* NOTE: we can only read in the new data if we get back a work item. 668 * otherwise we have too many frames in flight and should wait until the 669 * next frame to try again */ 670 if (work) { 671 ComputeShaderCtx *cs = &ctx->csctx; 672 if (!uv4_equal(cs->dec_data_dim, bp->dec_data_dim)) { 673 alloc_shader_storage(ctx, *arena); 674 /* TODO: we may need to invalidate all queue items here */ 675 } 676 677 if (ctx->params->export_next_frame) { 678 /* TODO: we don't really want the beamformer opening/closing files */ 679 iptr f = ctx->platform.open_for_write(ctx->params->export_pipe_name); 680 work->compute_ctx.export_handle = f; 681 ctx->params->export_next_frame = 0; 682 } else { 683 work->compute_ctx.export_handle = INVALID_FILE; 684 } 685 686 b32 output_3d = bp->output_points.x > 1 && bp->output_points.y > 1 && 687 bp->output_points.z > 1; 688 689 if (output_3d) { 690 work->type = BW_PARTIAL_COMPUTE; 691 BeamformFrame *frame = &ctx->partial_compute_ctx.frame; 692 uv4 out_dim = ctx->params->raw.output_points; 693 out_dim.w = ctx->params->raw.xdc_count; 694 alloc_beamform_frame(&ctx->gl, frame, out_dim, 0, s8("Beamformed_Volume")); 695 work->compute_ctx.frame = frame; 696 } 697 698 u32 raw_index = work->compute_ctx.raw_data_ssbo_index; 699 uv2 rf_raw_dim = cs->rf_raw_dim; 700 size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16); 701 void *rf_data_buf = cs->raw_data_arena.beg + raw_index * rf_raw_size; 702 703 alloc_output_image(ctx, bp->output_points); 704 705 size rlen = ctx->platform.read_pipe(input->pipe_handle, rf_data_buf, rf_raw_size); 706 if (rlen != rf_raw_size) { 707 stream_append_s8(&ctx->error_stream, s8("Partial Read Occurred: ")); 708 stream_append_i64(&ctx->error_stream, rlen); 709 stream_append_byte(&ctx->error_stream, '/'); 710 stream_append_i64(&ctx->error_stream, rf_raw_size); 711 stream_append_s8(&ctx->error_stream, s8("\n\0")); 712 TraceLog(LOG_WARNING, (c8 *)stream_to_s8(&ctx->error_stream).data); 713 ctx->error_stream.widx = 0; 714 } else { 715 switch (ctx->gl.vendor_id) { 716 case GL_VENDOR_AMD: 717 case GL_VENDOR_ARM: 718 case GL_VENDOR_INTEL: 719 break; 720 case GL_VENDOR_NVIDIA: 721 glNamedBufferSubData(cs->raw_data_ssbo, raw_index * rlen, 722 rlen, rf_data_buf); 723 } 724 } 725 } 726 } 727 728 ctx->beamform_work_queue.did_compute_this_frame = 0; 729 do_beamform_work(ctx, arena); 730 731 /* NOTE: draw output image texture using render fragment shader */ 732 BeginTextureMode(ctx->fsctx.output); 733 ClearBackground(PINK); 734 BeginShaderMode(ctx->fsctx.shader); 735 FragmentShaderCtx *fs = &ctx->fsctx; 736 glUseProgram(fs->shader.id); 737 u32 out_texture = 0; 738 if (bp->output_points.w > 1) { 739 out_texture = ctx->averaged_frame.textures[0]; 740 } else { 741 BeamformFrame *f = ctx->beamform_frames + ctx->displayed_frame_index; 742 /* NOTE: verify we have actually beamformed something yet */ 743 if (f->dim.w) out_texture = f->textures[f->dim.w - 1]; 744 } 745 glBindTextureUnit(0, out_texture); 746 glUniform1f(fs->db_cutoff_id, fs->db); 747 glUniform1f(fs->threshold_id, fs->threshold); 748 DrawTexture(fs->output.texture, 0, 0, WHITE); 749 EndShaderMode(); 750 EndTextureMode(); 751 752 /* NOTE: regenerate mipmaps only when the output has actually changed */ 753 if (ctx->flags & GEN_MIPMAPS) { 754 /* NOTE: shut up raylib's reporting on mipmap gen */ 755 SetTraceLogLevel(LOG_NONE); 756 GenTextureMipmaps(&ctx->fsctx.output.texture); 757 SetTraceLogLevel(LOG_INFO); 758 ctx->flags &= ~GEN_MIPMAPS; 759 } 760 761 draw_ui(ctx, input); 762 763 if (IsKeyPressed(KEY_R)) { 764 ctx->flags |= RELOAD_SHADERS; 765 if (ui_can_start_compute(ctx)) 766 ui_start_compute(ctx); 767 } 768 if (WindowShouldClose()) 769 ctx->flags |= SHOULD_EXIT; 770 }