Commit: 45709ce11f11f5ff323b5e3a79003e556134d82b
Parent: 0a38419f823a9389dc6383df994de19526d86c1b
Author: Randy Palamar
Date: Wed, 23 Apr 2025 09:28:46 -0600
core: allow hadamard allocation to fail
if the caller requests a unsupported/non-hadamard size or if the
number exceeds the available space in the arena return nothing.
Diffstat:
M | beamformer.c | | | 20 | +++++++++----------- |
M | util.c | | | 71 | ++++++++++++++++++++++++++++++++++++++++++----------------------------- |
2 files changed, 51 insertions(+), 40 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -143,17 +143,15 @@ alloc_shader_storage(BeamformerCtx *ctx, u32 rf_raw_size, Arena a)
ctx->cuda_lib.init_cuda_configuration(bp->rf_raw_dim.E, bp->dec_data_dim.E,
ctx->shared_memory->channel_mapping);
- /* NOTE: store hadamard in GPU once; it won't change for a particular imaging session */
- iz hadamard_elements = dec_data_dim.z * dec_data_dim.z;
- i32 *hadamard = alloc(&a, i32, hadamard_elements);
- i32 *tmp = alloc(&a, i32, hadamard_elements);
- fill_hadamard_transpose(hadamard, tmp, dec_data_dim.z);
- glDeleteTextures(1, &cs->hadamard_texture);
- glCreateTextures(GL_TEXTURE_2D, 1, &cs->hadamard_texture);
- glTextureStorage2D(cs->hadamard_texture, 1, GL_R8I, dec_data_dim.z, dec_data_dim.z);
- glTextureSubImage2D(cs->hadamard_texture, 0, 0, 0, dec_data_dim.z, dec_data_dim.z,
- GL_RED_INTEGER, GL_INT, hadamard);
- LABEL_GL_OBJECT(GL_TEXTURE, cs->hadamard_texture, s8("Hadamard_Matrix"));
+ i32 *hadamard = make_hadamard_transpose(&a, dec_data_dim.z);
+ if (hadamard) {
+ glDeleteTextures(1, &cs->hadamard_texture);
+ glCreateTextures(GL_TEXTURE_2D, 1, &cs->hadamard_texture);
+ glTextureStorage2D(cs->hadamard_texture, 1, GL_R8I, dec_data_dim.z, dec_data_dim.z);
+ glTextureSubImage2D(cs->hadamard_texture, 0, 0, 0, dec_data_dim.z, dec_data_dim.z,
+ GL_RED_INTEGER, GL_INT, hadamard);
+ LABEL_GL_OBJECT(GL_TEXTURE, cs->hadamard_texture, s8("Hadamard_Matrix"));
+ }
}
static b32
diff --git a/util.c b/util.c
@@ -72,6 +72,16 @@ alloc_(Arena *a, iz len, iz align, iz count)
return mem_clear(p, 0, count * len);
}
+#define arena_capacity(a, t) arena_capacity_(a, sizeof(t), _Alignof(t))
+function iz
+arena_capacity_(Arena *a, iz size, iz alignment)
+{
+ iz padding = -(uintptr_t)a->beg & (alignment - 1);
+ iz available = a->end - a->beg - padding;
+ iz result = available / size;
+ return result;
+}
+
enum { DA_INITIAL_CAP = 8 };
#define da_reserve(a, s, n) \
(s)->data = da_reserve_((a), (s)->data, &(s)->capacity, (s)->count + n, \
@@ -780,40 +790,43 @@ kronecker_product(i32 *out, i32 *a, uv2 a_dim, i32 *b, uv2 b_dim)
}
/* NOTE/TODO: to support even more hadamard sizes use the Paley construction */
-static void
-fill_hadamard_transpose(i32 *out, i32 *tmp, u32 dim)
+function i32 *
+make_hadamard_transpose(Arena *a, u32 dim)
{
- ASSERT(dim);
+ i32 *result = 0;
+
b32 power_of_2 = ISPOWEROF2(dim);
b32 multiple_of_12 = dim % 12 == 0;
-
- if (!power_of_2 && !multiple_of_12)
- return;
-
- if (!power_of_2) {
- ASSERT(multiple_of_12);
- dim /= 12;
- }
-
- i32 *m;
- if (power_of_2) m = out;
- else m = tmp;
-
- #define IND(i, j) ((i) * dim + (j))
- m[0] = 1;
- for (u32 k = 1; k < dim; k *= 2) {
- for (u32 i = 0; i < k; i++) {
- for (u32 j = 0; j < k; j++) {
- i32 val = m[IND(i, j)];
- m[IND(i + k, j)] = val;
- m[IND(i, j + k)] = val;
- m[IND(i + k, j + k)] = -val;
+ iz elements = dim * dim;
+
+ if (dim && (power_of_2 || multiple_of_12) &&
+ arena_capacity(a, i32) >= elements * (1 + multiple_of_12))
+ {
+ if (!power_of_2) dim /= 12;
+ result = alloc(a, i32, elements);
+
+ Arena tmp = *a;
+ i32 *m = power_of_2 ? result : alloc(&tmp, i32, elements);
+
+ #define IND(i, j) ((i) * dim + (j))
+ m[0] = 1;
+ for (u32 k = 1; k < dim; k *= 2) {
+ for (u32 i = 0; i < k; i++) {
+ for (u32 j = 0; j < k; j++) {
+ i32 val = m[IND(i, j)];
+ m[IND(i + k, j)] = val;
+ m[IND(i, j + k)] = val;
+ m[IND(i + k, j + k)] = -val;
+ }
}
}
+ #undef IND
+
+ if (!power_of_2) {
+ kronecker_product(result, m, (uv2){.x = dim, .y = dim},
+ hadamard_12_12_transpose, (uv2){.x = 12, .y = 12});
+ }
}
- #undef IND
- if (!power_of_2)
- kronecker_product(out, tmp, (uv2){.x = dim, .y = dim}, hadamard_12_12_transpose,
- (uv2){.x = 12, .y = 12});
+ return result;
}