Commit: 5e1ccca056d2cb33f9416e99f72e00dbe6ed8079
Parent: 0fad2570f0676f4b9663b8b134025676e01083a7
Author: Randy Palamar
Date: Wed, 18 Jun 2025 14:12:12 -0600
core: allow non power of 2 images to be created
These will result in lots of wasted compute and is not recommended
but will keep the result from a library perspective expected.
Before this the library would try to read the wrong amount of data
and lock up/timeout waiting for data that would never come.
For optimal compute usage images should be a power of 2 multiple
of DAS_LOCAL_SIZE in each dimension.
Diffstat:
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -82,9 +82,9 @@ function void
alloc_beamform_frame(GLParams *gp, BeamformFrame *out, ComputeShaderStats *out_stats,
uv3 out_dim, s8 name, Arena arena)
{
- out->dim.x = MAX(1, round_down_power_of_2(ORONE(out_dim.x)));
- out->dim.y = MAX(1, round_down_power_of_2(ORONE(out_dim.y)));
- out->dim.z = MAX(1, round_down_power_of_2(ORONE(out_dim.z)));
+ out->dim.x = MAX(1, out_dim.x);
+ out->dim.y = MAX(1, out_dim.y);
+ out->dim.z = MAX(1, out_dim.z);
if (gp) {
out->dim.x = MIN(out->dim.x, gp->max_3d_texture_dim);
@@ -95,7 +95,7 @@ alloc_beamform_frame(GLParams *gp, BeamformFrame *out, ComputeShaderStats *out_s
/* NOTE: allocate storage for beamformed output data;
* this is shared between compute and fragment shaders */
u32 max_dim = MAX(out->dim.x, MAX(out->dim.y, out->dim.z));
- out->mips = ctz_u32(max_dim) + 1;
+ out->mips = ctz_u32(round_up_power_of_2(max_dim)) + 1;
Stream label = arena_stream(arena);
stream_append_s8(&label, name);
diff --git a/util.c b/util.c
@@ -508,6 +508,13 @@ round_down_power_of_2(u32 a)
return result;
}
+function u32
+round_up_power_of_2(u32 a)
+{
+ u32 result = 0x80000000UL >> (clz_u32(a - 1) - 1);
+ return result;
+}
+
function iz
round_up_to(iz value, iz multiple)
{