Commit: b6bd87b30496428a650e027307ee11d0d7e6e332
Parent: 6c7f2e251664fa9a7991bf709392838e3d259b54
Author: Randy Palamar
Date: Fri, 25 Jul 2025 13:30:53 -0600
core: treat data as IQ when demodulating
All the alternate sampling modes work if you just pretend samples
are IQ pairs and rotate the Q sample back by 90 degrees.
Diffstat:
6 files changed, 183 insertions(+), 120 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -1,5 +1,11 @@
/* See LICENSE for license details. */
/* TODO(rnp):
+ * [ ]: make decode output real values for real inputs and complex values for complex inputs
+ * - this means that das should have a RF version and an IQ version
+ * - this will also flip the current hack to support demodulate after decode to
+ * being a hack to support CudaHilbert after decode
+ * [ ]: measure filtering performance when indexing forward on RF data
+ * [ ]: filter sampling frequency should be a filter creation parameter
* [ ]: reinvestigate ring buffer raw_data_ssbo
* - to minimize latency the main thread should manage the subbuffer upload so that the
* compute thread can just keep computing. This way we can keep the copmute thread busy
@@ -71,7 +77,7 @@ beamformer_filter_time_offset(BeamformerFilter *f)
f32 result = 0;
switch (f->kind) {
case BeamformerFilterKind_Kaiser:{
- result = -(f32)f->length / 2.0f / f->sampling_frequency;
+ result = (f32)f->length / 2.0f / f->sampling_frequency;
}break;
InvalidDefaultCase;
}
@@ -313,7 +319,6 @@ plan_compute_pipeline(SharedMemoryRegion *os_sm, BeamformerComputePipeline *cp,
os_shared_memory_region_lock(os_sm, sm->locks, compute_lock, (u32)-1);
b32 decode_first = sm->shaders[0] == BeamformerShaderKind_Decode;
- b32 demod_first = sm->shaders[0] == BeamformerShaderKind_Demodulate;
b32 cuda_hilbert = 0;
b32 demodulate = 0;
@@ -347,7 +352,10 @@ plan_compute_pipeline(SharedMemoryRegion *os_sm, BeamformerComputePipeline *cp,
[BeamformerDataKind_Float32] = BeamformerShaderKind_DecodeFloat,
[BeamformerDataKind_Float32Complex] = BeamformerShaderKind_DecodeFloatComplex,
};
- if (decode_first) {
+ if (decode_first && demodulate) {
+ /* TODO(rnp): for now we assume that if we are demodulating the data is int16 */
+ shader = BeamformerShaderKind_DecodeInt16ToFloat;
+ } else if (decode_first) {
shader = decode_table[CLAMP(data_kind, 0, countof(decode_table) - 1)];
} else {
if (data_kind == BeamformerDataKind_Int16)
@@ -358,7 +366,7 @@ plan_compute_pipeline(SharedMemoryRegion *os_sm, BeamformerComputePipeline *cp,
commit = 1;
}break;
case BeamformerShaderKind_Demodulate:{
- if (!demod_first || (demod_first && data_kind == BeamformerDataKind_Float32))
+ if (decode_first || (!decode_first && data_kind == BeamformerDataKind_Float32))
shader = BeamformerShaderKind_DemodulateFloat;
bp->time_offset += beamformer_filter_time_offset(filters + sp->filter_slot);
commit = 1;
@@ -379,58 +387,89 @@ plan_compute_pipeline(SharedMemoryRegion *os_sm, BeamformerComputePipeline *cp,
}
os_shared_memory_region_unlock(os_sm, sm->locks, compute_lock);
+ u32 time_compression = 1;
+ if (demodulate) time_compression = 2;
+
+ if (!demodulate) bp->center_frequency = 0;
+ bp->decimation_rate = MAX(bp->decimation_rate, 1);
+
+ cp->decode_dispatch.x = (u32)ceil_f32((f32)bp->dec_data_dim[0] / DECODE_LOCAL_SIZE_X);
+ cp->decode_dispatch.y = (u32)ceil_f32((f32)bp->dec_data_dim[1] / DECODE_LOCAL_SIZE_Y);
+ cp->decode_dispatch.z = (u32)ceil_f32((f32)bp->dec_data_dim[2] / DECODE_LOCAL_SIZE_Z);
+
+ /* NOTE(rnp): decode 2 samples per dispatch when data is i16 */
+ if (decode_first && cp->data_kind == BeamformerDataKind_Int16)
+ cp->decode_dispatch.x = (u32)ceil_f32((f32)cp->decode_dispatch.x / 2);
+
BeamformerDecodeUBO *dp = &cp->decode_ubo_data;
dp->decode_mode = bp->decode;
dp->transmit_count = bp->dec_data_dim[2];
- bp->decimation_rate = MAX(bp->decimation_rate, 1);
-
if (decode_first) {
dp->input_channel_stride = bp->rf_raw_dim[0];
dp->input_sample_stride = 1;
dp->input_transmit_stride = bp->dec_data_dim[0];
- dp->output_channel_stride = bp->dec_data_dim[0] * bp->dec_data_dim[2];
+ dp->output_channel_stride = bp->dec_data_dim[0] * bp->dec_data_dim[2] / time_compression;
+ dp->output_sample_stride = 1;
+ dp->output_transmit_stride = bp->dec_data_dim[0] / time_compression;
+ } else {
+ dp->input_channel_stride = bp->dec_data_dim[0] * bp->dec_data_dim[2] /
+ bp->decimation_rate / time_compression;
+ dp->input_sample_stride = bp->dec_data_dim[2];
+ dp->input_transmit_stride = 1;
+
+ dp->output_channel_stride = dp->input_channel_stride;
dp->output_sample_stride = 1;
- dp->output_transmit_stride = bp->dec_data_dim[0];
+ dp->output_transmit_stride = bp->dec_data_dim[0] / bp->decimation_rate / time_compression;
}
+ /* NOTE(rnp): when we are demodulating we pretend that the sampler was alternating
+ * between sampling the I portion and the Q portion of an IQ signal. Therefore there
+ * is an implicit decimation factor of 2 which must always be included. All code here
+ * assumes that the signal was sampled in such a way that supports this operation.
+ * To recover IQ[n] from the sampled data (RF[n]) we do the following:
+ * I[n] = RF[n]
+ * Q[n] = RF[n + 1]
+ * IQ[n] = I[n] - j*Q[n]
+ */
if (demodulate) {
BeamformerDemodulateUBO *mp = &cp->demod_ubo_data;
- mp->sampling_frequency = bp->sampling_frequency;
mp->demodulation_frequency = bp->center_frequency;
+ mp->sampling_frequency = bp->sampling_frequency / (f32)time_compression;
mp->decimation_rate = bp->decimation_rate;
+ mp->map_channels = !decode_first;
- bp->sampling_frequency /= (f32)mp->decimation_rate;
- bp->dec_data_dim[0] /= mp->decimation_rate;
+ if (decode_first) {
+ mp->input_channel_stride = dp->output_channel_stride;
+ mp->input_sample_stride = dp->output_sample_stride;
+ mp->input_transmit_stride = dp->output_transmit_stride;
- mp->input_sample_stride = 1;
- mp->input_transmit_stride = bp->dec_data_dim[0] * mp->decimation_rate;
- mp->output_channel_stride = bp->dec_data_dim[0] * bp->dec_data_dim[2];
-
- if (demod_first) {
- /* NOTE(rnp): output optimized decode layout to skip first pass */
- mp->input_channel_stride = bp->rf_raw_dim[0];
- mp->output_sample_stride = bp->dec_data_dim[2];
- mp->output_transmit_stride = 1;
- mp->map_channels = 1;
+ mp->output_channel_stride = bp->dec_data_dim[0] * bp->dec_data_dim[2] /
+ mp->decimation_rate / time_compression;
+ mp->output_sample_stride = 1;
+ mp->output_transmit_stride = bp->dec_data_dim[0] / mp->decimation_rate / time_compression;
+ } else {
+ mp->input_channel_stride = bp->rf_raw_dim[0] / time_compression;
+ mp->input_sample_stride = 1;
+ mp->input_transmit_stride = bp->dec_data_dim[0] / time_compression;
- dp->input_channel_stride = mp->output_channel_stride;
- dp->input_sample_stride = mp->output_sample_stride;
- dp->input_transmit_stride = mp->output_transmit_stride;
+ /* NOTE(rnp): output optimized layout for decoding */
+ mp->output_channel_stride = dp->input_channel_stride;
+ mp->output_sample_stride = dp->input_sample_stride;
+ mp->output_transmit_stride = dp->input_transmit_stride;
- dp->output_channel_stride = bp->dec_data_dim[0] * bp->dec_data_dim[2];
- dp->output_sample_stride = 1;
- dp->output_transmit_stride = bp->dec_data_dim[0];
- } else {
- mp->input_channel_stride = dp->output_channel_stride;
- mp->output_sample_stride = 1;
- mp->output_transmit_stride = bp->dec_data_dim[0];
- mp->map_channels = 0;
+ u32 time_samples = bp->dec_data_dim[0] / mp->decimation_rate / time_compression;
+ cp->decode_dispatch.x = (u32)ceil_f32((f32)time_samples / DECODE_LOCAL_SIZE_X);
}
- } else {
- bp->center_frequency = 0;
- bp->decimation_rate = 1;
+
+ f32 local_size_x = DEMOD_LOCAL_SIZE_X * (f32)time_compression * (f32)mp->decimation_rate;
+ cp->demod_dispatch.x = (u32)ceil_f32((f32)bp->dec_data_dim[0] / local_size_x);
+ cp->demod_dispatch.y = (u32)ceil_f32((f32)bp->dec_data_dim[1] / DEMOD_LOCAL_SIZE_Y);
+ cp->demod_dispatch.z = (u32)ceil_f32((f32)bp->dec_data_dim[2] / DEMOD_LOCAL_SIZE_Z);
+
+ bp->sampling_frequency /= (f32)mp->decimation_rate * (f32)time_compression;
+ bp->dec_data_dim[0] /= mp->decimation_rate * time_compression;
}
}
@@ -489,40 +528,32 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformerComputeFrame *frame
case BeamformerShaderKind_DecodeInt16Complex:
case BeamformerShaderKind_DecodeFloat:
case BeamformerShaderKind_DecodeFloatComplex:
+ case BeamformerShaderKind_DecodeInt16ToFloat:
{
glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[BeamformerComputeUBOKind_Decode]);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, csctx->rf_data_ssbos[output_ssbo_idx]);
glBindImageTexture(0, csctx->hadamard_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8I);
- /* NOTE(rnp): decode 2 samples per dispatch when data is i16 */
- f32 local_size_x = (f32)DECODE_LOCAL_SIZE_X;
- if (shader == BeamformerShaderKind_Decode)
- local_size_x *= 2;
-
- uv3 dim = csctx->dec_data_dim.xyz;
iz raw_size = csctx->rf_raw_size;
if (shader == cp->shaders[0]) {
glBindImageTexture(1, csctx->channel_mapping_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16I);
glProgramUniform1ui(program, DECODE_FIRST_PASS_UNIFORM_LOC, 1);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, csctx->raw_data_ssbo, 0, raw_size);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, csctx->raw_data_ssbo, raw_size, raw_size);
- glDispatchCompute((u32)ceil_f32((f32)dim.x / local_size_x),
- (u32)ceil_f32((f32)dim.y / DECODE_LOCAL_SIZE_Y),
- (u32)ceil_f32((f32)dim.z / DECODE_LOCAL_SIZE_Z));
+ glDispatchCompute(cp->decode_dispatch.x, cp->decode_dispatch.y, cp->decode_dispatch.z);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
}
if (shader == cp->shaders[0]) {
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, csctx->raw_data_ssbo, raw_size, raw_size);
} else {
- dim = uv4_from_u32_array(cp->das_ubo_data.dec_data_dim).xyz;
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]);
}
glProgramUniform1ui(program, DECODE_FIRST_PASS_UNIFORM_LOC, 0);
- glDispatchCompute((u32)ceil_f32((f32)dim.x / local_size_x),
- (u32)ceil_f32((f32)dim.y / DECODE_LOCAL_SIZE_Y),
- (u32)ceil_f32((f32)dim.z / DECODE_LOCAL_SIZE_Z));
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, csctx->rf_data_ssbos[output_ssbo_idx]);
+
+ glDispatchCompute(cp->decode_dispatch.x, cp->decode_dispatch.y, cp->decode_dispatch.z);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
@@ -551,10 +582,7 @@ do_compute_shader(BeamformerCtx *ctx, Arena arena, BeamformerComputeFrame *frame
if (ubo->map_channels)
glBindImageTexture(1, csctx->channel_mapping_texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16I);
- f32 local_size_x = (f32)(DEMOD_LOCAL_SIZE_X * (f32)ubo->decimation_rate);
- glDispatchCompute((u32)ceil_f32((f32)csctx->dec_data_dim.x / local_size_x),
- (u32)ceil_f32((f32)csctx->dec_data_dim.y / DEMOD_LOCAL_SIZE_Y),
- (u32)ceil_f32((f32)csctx->dec_data_dim.z / DEMOD_LOCAL_SIZE_Z));
+ glDispatchCompute(cp->demod_dispatch.x, cp->demod_dispatch.y, cp->demod_dispatch.z);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
@@ -726,24 +754,19 @@ shader_text_with_header(ShaderReloadContext *ctx, OS *os, Arena *arena)
#undef X
}break;
case BeamformerShaderKind_Decode:
- case BeamformerShaderKind_DecodeInt16Complex:
case BeamformerShaderKind_DecodeFloat:
case BeamformerShaderKind_DecodeFloatComplex:
+ case BeamformerShaderKind_DecodeInt16Complex:
+ case BeamformerShaderKind_DecodeInt16ToFloat:
{
- switch (ctx->kind) {
- case BeamformerShaderKind_DecodeInt16Complex:{
- stream_append_s8(&sb, s8("#define INPUT_DATA_TYPE_INT16_COMPLEX\n\n"));
- }break;
- case BeamformerShaderKind_DecodeFloat:{
- stream_append_s8(&sb, s8("#define INPUT_DATA_TYPE_FLOAT\n\n"));
- }break;
- case BeamformerShaderKind_DecodeFloatComplex:{
- stream_append_s8(&sb, s8("#define INPUT_DATA_TYPE_FLOAT_COMPLEX\n\n"));
- }break;
- default:{}break;
- }
+ s8 define_table[] = {
+ [BeamformerShaderKind_DecodeFloatComplex] = s8("#define INPUT_DATA_TYPE_FLOAT_COMPLEX\n\n"),
+ [BeamformerShaderKind_DecodeFloat] = s8("#define INPUT_DATA_TYPE_FLOAT\n\n"),
+ [BeamformerShaderKind_DecodeInt16Complex] = s8("#define INPUT_DATA_TYPE_INT16_COMPLEX\n\n"),
+ [BeamformerShaderKind_DecodeInt16ToFloat] = s8("#define OUTPUT_DATA_TYPE_FLOAT\n\n"),
+ };
#define X(type, id, pretty) "#define DECODE_MODE_" #type " " #id "\n"
- stream_append_s8(&sb, s8(""
+ stream_append_s8s(&sb, define_table[ctx->kind], s8(""
"layout(local_size_x = " str(DECODE_LOCAL_SIZE_X) ", "
"local_size_y = " str(DECODE_LOCAL_SIZE_Y) ", "
"local_size_z = " str(DECODE_LOCAL_SIZE_Z) ") in;\n\n"
@@ -847,6 +870,10 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
src->shader = cs->programs + src->kind;
success &= reload_compute_shader(ctx, src, s8(" (I16C)"), arena);
+ src->kind = BeamformerShaderKind_DecodeInt16ToFloat;
+ src->shader = cs->programs + src->kind;
+ success &= reload_compute_shader(ctx, src, s8(" (I16-F32)"), arena);
+
src->kind = BeamformerShaderKind_Decode;
src->shader = cs->programs + src->kind;
}break;
@@ -899,7 +926,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
}break;
case BeamformerWorkKind_CreateFilter:{
BeamformerCreateFilterContext *fctx = &work->create_filter_context;
- beamformer_filter_update(cs->filters + fctx->slot, fctx, sm->parameters.sampling_frequency, arena);
+ beamformer_filter_update(cs->filters + fctx->slot, fctx, sm->parameters.sampling_frequency / 2, arena);
}break;
case BeamformerWorkKind_UploadBuffer:{
os_shared_memory_region_lock(&ctx->shared_memory, sm->locks, (i32)work->lock, (u32)-1);
@@ -962,6 +989,10 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena arena, iptr gl_co
DEBUG_DECL(work->kind = BeamformerWorkKind_ComputeIndirect;)
} /* FALLTHROUGH */
case BeamformerWorkKind_Compute:{
+ DEBUG_DECL(glClearNamedBufferData(cs->rf_data_ssbos[0], GL_RG32F, GL_RG, GL_FLOAT, 0);)
+ DEBUG_DECL(glClearNamedBufferData(cs->rf_data_ssbos[1], GL_RG32F, GL_RG, GL_FLOAT, 0);)
+ DEBUG_DECL(glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);)
+
push_compute_timing_info(ctx->compute_timing_table,
(ComputeTimingInfo){.kind = ComputeTimingInfoKind_ComputeFrameBegin});
diff --git a/beamformer.h b/beamformer.h
@@ -162,6 +162,9 @@ typedef struct {
i32 shader_count;
BeamformerDataKind data_kind;
+ uv3 decode_dispatch;
+ uv3 demod_dispatch;
+
u32 ubos[BeamformerComputeUBOKind_Count];
#define X(k, type, name) type name ##_ubo_data;
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -25,8 +25,9 @@
X(DecodeInt16Complex, 7, "", "Decode (I16C)") \
X(DecodeFloat, 8, "", "Decode (F32)") \
X(DecodeFloatComplex, 9, "", "Decode (F32C)") \
- X(DemodulateFloat, 10, "", "Demodulate (F32)") \
- X(DASFast, 11, "", "DAS (Fast)")
+ X(DecodeInt16ToFloat, 10, "", "Decode (I16-F32)") \
+ X(DemodulateFloat, 11, "", "Demodulate (F32)") \
+ X(DASFast, 12, "", "DAS (Fast)")
typedef enum {
#define X(e, n, ...) BeamformerShaderKind_##e = n,
diff --git a/opengl.h b/opengl.h
@@ -77,6 +77,7 @@ typedef uint64_t GLuint64;
X(glBindTextureUnit, void, (GLuint unit, GLuint texture)) \
X(glBindVertexArray, void, (GLuint array)) \
X(glBlitNamedFramebuffer, void, (GLuint sfb, GLuint dfb, GLint sx0, GLint sy0, GLint sx1, GLint sy1, GLint dx0, GLint dy0, GLint dx1, GLint dy1, GLbitfield mask, GLenum filter)) \
+ X(glClearNamedBufferData, void, (GLuint buffer, GLenum internalformat, GLenum format, GLenum type, const void *data)) \
X(glClearNamedFramebufferfv, void, (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLfloat *value)) \
X(glClearTexImage, void, (GLuint texture, GLint level, GLenum format, GLenum type, const void *data)) \
X(glCompileShader, void, (GLuint shader)) \
diff --git a/shaders/decode.glsl b/shaders/decode.glsl
@@ -28,11 +28,15 @@
#define SAMPLE_TYPE_CAST(x) vec2(((x) << 16) >> 16, (x) >> 16)
#else
#define INPUT_DATA_TYPE int
- #define RF_SAMPLES_PER_INDEX 2
#define RESULT_TYPE_CAST(x) (x)
- #define SAMPLE_DATA_TYPE vec4
/* NOTE(rnp): for i16 rf_data we decode 2 samples at once */
- #define SAMPLE_TYPE_CAST(x) vec4(((x) << 16) >> 16, 0, (x) >> 16, 0)
+ #define RF_SAMPLES_PER_INDEX 2
+ #define SAMPLE_DATA_TYPE vec4
+ #if defined(OUTPUT_DATA_TYPE_FLOAT)
+ #define SAMPLE_TYPE_CAST(x) vec4(((x) << 16) >> 16, (x) >> 16, 0, 0)
+ #else
+ #define SAMPLE_TYPE_CAST(x) vec4(((x) << 16) >> 16, 0, (x) >> 16, 0)
+ #endif
#endif
layout(std430, binding = 1) readonly restrict buffer buffer_1 {
@@ -64,30 +68,39 @@ void main()
uint rf_offset = (input_channel_stride * channel + transmit_count * time_sample) / RF_SAMPLES_PER_INDEX;
if (u_first_pass) {
- uint in_off = input_channel_stride * imageLoad(channel_mapping, int(channel)).x +
- input_transmit_stride * transmit +
- input_sample_stride * time_sample;
- out_rf_data[rf_offset + transmit] = rf_data[in_off / RF_SAMPLES_PER_INDEX];
+ if (time_sample < input_transmit_stride) {
+ uint in_off = input_channel_stride * imageLoad(channel_mapping, int(channel)).x +
+ input_transmit_stride * transmit +
+ input_sample_stride * time_sample;
+ out_rf_data[rf_offset + transmit] = rf_data[in_off / RF_SAMPLES_PER_INDEX];
+ }
} else {
- uint out_off = output_channel_stride * channel +
- output_transmit_stride * transmit +
- output_sample_stride * time_sample;
+ #if defined(OUTPUT_DATA_TYPE_FLOAT)
+ /* NOTE(rnp): when outputting floats do not dilate the out time sample;
+ * output should end up densely packed */
+ time_sample = gl_GlobalInvocationID.x;
+ #endif
+ if (time_sample < output_transmit_stride) {
+ uint out_off = output_channel_stride * channel +
+ output_transmit_stride * transmit +
+ output_sample_stride * time_sample;
- vec4 result = vec4(0);
- switch (decode_mode) {
- case DECODE_MODE_NONE: {
- result = RESULT_TYPE_CAST(sample_rf_data(rf_offset + transmit));
- } break;
- case DECODE_MODE_HADAMARD: {
- SAMPLE_DATA_TYPE sum = SAMPLE_DATA_TYPE(0);
- for (int i = 0; i < transmit_count; i++)
- sum += imageLoad(hadamard, ivec2(i, transmit)).x * sample_rf_data(rf_offset++);
- result = RESULT_TYPE_CAST(sum) / float(transmit_count);
- } break;
+ vec4 result = vec4(0);
+ switch (decode_mode) {
+ case DECODE_MODE_NONE: {
+ result = RESULT_TYPE_CAST(sample_rf_data(rf_offset + transmit));
+ } break;
+ case DECODE_MODE_HADAMARD: {
+ SAMPLE_DATA_TYPE sum = SAMPLE_DATA_TYPE(0);
+ for (int i = 0; i < imageSize(hadamard).x; i++)
+ sum += imageLoad(hadamard, ivec2(i, transmit)).x * sample_rf_data(rf_offset++);
+ result = RESULT_TYPE_CAST(sum) / float(imageSize(hadamard).x);
+ } break;
+ }
+ out_data[out_off + 0] = result.xy;
+ #if RF_SAMPLES_PER_INDEX == 2 && !defined(OUTPUT_DATA_TYPE_FLOAT)
+ out_data[out_off + 1] = result.zw;
+ #endif
}
- out_data[out_off + 0] = result.xy;
- #if RF_SAMPLES_PER_INDEX == 2
- out_data[out_off + 1] = result.zw;
- #endif
}
}
diff --git a/shaders/demod.glsl b/shaders/demod.glsl
@@ -1,39 +1,44 @@
/* See LICENSE for license details. */
#if defined(INPUT_DATA_TYPE_FLOAT)
- #define INPUT_DATA_TYPE vec2
- #define RF_SAMPLES_PER_INDEX 1
- #define RESULT_TYPE_CAST(x) (x)
- #define SAMPLE_TYPE_CAST(v, i) (v).x
+ #define DATA_TYPE vec2
+ #define RESULT_TYPE_CAST(v) (v)
+ #define SAMPLE_TYPE_CAST(v) (v)
#else
- #define INPUT_DATA_TYPE uint
- #define RF_SAMPLES_PER_INDEX 2
- #define RESULT_TYPE_CAST(v) packSnorm2x16(v)
- #define SAMPLE_TYPE_CAST(v, i) unpackSnorm2x16(v)[(~(i)) & 1u]
- /* NOTE(rnp): for outputting complex floats */
- //#define RESULT_TYPE_CAST(v) (v)
- //#define SAMPLE_TYPE_CAST(v, i) unpackSnorm2x16(v)[(~(i)) & 1u] * 32767.0f
+ #define DATA_TYPE uint
+ #define RESULT_TYPE_CAST(v) packSnorm2x16(v)
+ #define SAMPLE_TYPE_CAST(v) unpackSnorm2x16(v)
#endif
layout(std430, binding = 1) readonly restrict buffer buffer_1 {
- INPUT_DATA_TYPE in_data[];
+ DATA_TYPE in_data[];
};
layout(std430, binding = 2) writeonly restrict buffer buffer_2 {
- INPUT_DATA_TYPE out_data[];
+ DATA_TYPE out_data[];
};
layout(r32f, binding = 0) readonly restrict uniform image1D filter_coefficients;
layout(r16i, binding = 1) readonly restrict uniform iimage1D channel_mapping;
-float sample_rf(uint index)
+vec2 rotate_iq(vec2 iq, uint index)
{
- float result = SAMPLE_TYPE_CAST(in_data[index], index);
+ float arg = radians(360) * demodulation_frequency * index / sampling_frequency;
+ mat2 phasor = mat2( cos(arg), sin(arg),
+ -sin(arg), cos(arg));
+ vec2 result = phasor * iq;
+ return result;
+}
+
+vec2 sample_rf(uint index)
+{
+ vec2 result = SAMPLE_TYPE_CAST(in_data[index]);
return result;
}
void main()
{
uint in_sample = gl_GlobalInvocationID.x * decimation_rate;
+ uint out_sample = gl_GlobalInvocationID.x;
uint channel = gl_GlobalInvocationID.y;
uint transmit = gl_GlobalInvocationID.z;
@@ -41,17 +46,26 @@ void main()
uint in_offset = input_channel_stride * in_channel + input_transmit_stride * transmit;
uint out_offset = output_channel_stride * channel +
output_transmit_stride * transmit +
- output_sample_stride * gl_GlobalInvocationID.x;
-
- float arg = radians(360) * demodulation_frequency / sampling_frequency;
- vec2 result = vec2(0);
- for (int i = 0; i < imageSize(filter_coefficients).x; i++) {
- uint index = in_sample + i;
- if (index < input_transmit_stride) {
- float data = sample_rf((in_offset + index) / RF_SAMPLES_PER_INDEX);
- vec2 iq = sqrt(2.0f) * data * vec2(cos(arg * index), -sin(arg * index));
- result += imageLoad(filter_coefficients, imageSize(filter_coefficients).x - i - 1).x * iq;
+ output_sample_stride * out_sample;
+
+ bool in_bounds;
+ if (map_channels) {
+ in_bounds = out_sample < output_channel_stride / output_sample_stride;
+ } else {
+ in_bounds = out_sample < output_transmit_stride;
+ }
+
+ if (in_bounds) {
+ vec2 result = vec2(0);
+ int index = int(in_sample);
+ for (int i = 0;
+ i < imageSize(filter_coefficients).x && index >= 0;
+ i++, index -= int(input_sample_stride))
+ {
+ vec2 data = sample_rf(in_offset + index) * vec2(1, -1);
+ vec2 iq = sqrt(2.0f) * rotate_iq(data, index);
+ result += imageLoad(filter_coefficients, i).x * iq;
}
+ out_data[out_offset] = RESULT_TYPE_CAST(result);
}
- out_data[out_offset] = RESULT_TYPE_CAST(result);
}