ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: e19c367e308b99a78e9d33d755d96ce6dc882c2d
Parent: e15bb512d82df62df5cabf3502000d05480cef60
Author: Randy Palamar
Date:   Thu,  2 Oct 2025 21:34:08 -0600

meta: generate lists to help with shader flag baking

rather than unpacking the baked flags manually in each shader just
generate the shader code with them unpacked in the header

Diffstat:
Mbeamformer.c | 55+++++++++++++++++++++++++++++++++----------------------
Mbeamformer.h | 2++
Mbeamformer.meta | 3---
Mbuild.c | 27++++++++++++++++++++-------
Mgenerated/beamformer.meta.c | 69++++++++++++++++++++++++++++++++++++---------------------------------
Mshaders/das.glsl | 56++++++++++++++++++++++----------------------------------
Mshaders/decode.glsl | 2+-
Mshaders/filter.glsl | 14++++++--------
8 files changed, 120 insertions(+), 108 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -443,7 +443,7 @@ das_voxel_transform_matrix(BeamformerParameters *bp) return result; } -function void +function u32 das_ubo_from_beamformer_parameters(BeamformerComputePlan *cp, BeamformerDASUBO *du, BeamformerParameters *bp) { du->voxel_transform = das_voxel_transform_matrix(bp); @@ -459,18 +459,20 @@ das_ubo_from_beamformer_parameters(BeamformerComputePlan *cp, BeamformerDASUBO * cp->das_bake.channel_count = bp->channel_count; cp->das_bake.acquisition_count = bp->acquisition_count; - cp->das_bake.shader_flags = 0; - if (bp->coherency_weighting) cp->das_bake.shader_flags |= BeamformerShaderDASFlags_CoherencyWeighting; - else cp->das_bake.shader_flags |= BeamformerShaderDASFlags_Fast; + u32 result = 0; + if (bp->coherency_weighting) result |= BeamformerShaderDASFlags_CoherencyWeighting; + else result |= BeamformerShaderDASFlags_Fast; if (bp->das_shader_id == BeamformerAcquisitionKind_UFORCES || bp->das_shader_id == BeamformerAcquisitionKind_UHERCULES) - cp->das_bake.shader_flags |= BeamformerShaderDASFlags_Sparse; + result |= BeamformerShaderDASFlags_Sparse; if (bp->das_shader_id == BeamformerAcquisitionKind_HERO_PA) - cp->das_bake.shader_flags |= BeamformerShaderDASFlags_ReceiveOnly; + result |= BeamformerShaderDASFlags_ReceiveOnly; if (bp->interpolate) - cp->das_bake.shader_flags |= BeamformerShaderDASFlags_Interpolate; + result |= BeamformerShaderDASFlags_Interpolate; + + return result; } function void @@ -478,7 +480,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) { BeamformerDASUBO *bp = &cp->das_ubo_data; - das_ubo_from_beamformer_parameters(cp, bp, &pb->parameters); + u32 das_flags = das_ubo_from_beamformer_parameters(cp, bp, &pb->parameters); b32 decode_first = pb->pipeline.shaders[0] == BeamformerShaderKind_Decode; b32 run_cuda_hilbert = 0; @@ -517,8 +519,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) } } - b->shader_flags = 0; - if (run_cuda_hilbert) b->shader_flags |= BeamformerShaderDecodeFlags_DilateOutput; + u32 *flags = cp->shader_flags + cp->pipeline.shader_count; + *flags = 0; + if (run_cuda_hilbert) *flags |= BeamformerShaderDecodeFlags_DilateOutput; commit = 1; }break; @@ -530,9 +533,10 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) b->filter_length = (u32)f->length; b->sampling_mode = pb->parameters.sampling_mode; - b->shader_flags = BeamformerShaderFilterFlags_Demodulate; - if (f->parameters.complex) b->shader_flags |= BeamformerShaderFilterFlags_ComplexFilter; - if (!decode_first) b->shader_flags |= BeamformerShaderFilterFlags_MapChannels; + u32 *flags = cp->shader_flags + cp->pipeline.shader_count; + *flags = BeamformerShaderFilterFlags_Demodulate; + if (f->parameters.complex) *flags |= BeamformerShaderFilterFlags_ComplexFilter; + if (!decode_first) *flags |= BeamformerShaderFilterFlags_MapChannels; b->data_kind = data_kind; if (decode_first) @@ -547,8 +551,9 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) cp->das_bake.time_offset += f->time_delay; b->filter_length = (u32)f->length; - b->shader_flags = 0; - if (f->parameters.complex) b->shader_flags |= BeamformerShaderFilterFlags_ComplexFilter; + u32 *flags = cp->shader_flags + cp->pipeline.shader_count; + *flags = 0; + if (f->parameters.complex) *flags |= BeamformerShaderFilterFlags_ComplexFilter; b->data_kind = data_kind; if (decode_first) @@ -560,6 +565,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) cp->das_bake.data_kind = BeamformerDataKind_Float32; if (demodulate || run_cuda_hilbert) cp->das_bake.data_kind = BeamformerDataKind_Float32Complex; + cp->shader_flags[cp->pipeline.shader_count] = das_flags; commit = 1; }break; default:{ commit = 1; }break; @@ -745,8 +751,6 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl for (i32 index = 0; index < header_vector_length; index++) stream_append_s8(&shader_stream, beamformer_shader_global_header_strings[header_vector[index]]); - stream_append_s8(&shader_stream, beamformer_shader_local_header_strings[reloadable_index]); - if (beamformer_shader_bake_parameter_counts[reloadable_index]) { i32 count = beamformer_shader_bake_parameter_counts[reloadable_index]; u32 *parameters = 0; @@ -768,6 +772,16 @@ load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_sl stream_append_hex_u64(&shader_stream, parameters[index]); stream_append_s8(&shader_stream, s8(")\n")); } + + stream_append_byte(&shader_stream, '\n'); + + s8 *flag_names = beamformer_shader_flag_strings[reloadable_index]; + u32 flag_count = beamformer_shader_flag_strings_count[reloadable_index]; + u32 flags = cp->shader_flags[shader_slot]; + for (u32 bit = 0; bit < flag_count; bit++) { + stream_append_s8s(&shader_stream, s8("#define "), flag_names[bit], + (flags & (1 << bit))? s8(" 1") : s8(" 0"), s8("\n")); + } } stream_append_s8(&shader_stream, s8("\n#line 1\n")); @@ -911,10 +925,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame case BeamformerShaderKind_Filter: case BeamformerShaderKind_Demodulate: { - BeamformerShaderFilterBakeParameters *b = &cp->filter_bake; - if (shader == BeamformerShaderKind_Demodulate) b = &cp->demodulate_bake; - - b32 map_channels = (b->shader_flags & BeamformerShaderFilterFlags_MapChannels) != 0; + b32 map_channels = (cp->shader_flags[shader_slot] & BeamformerShaderFilterFlags_MapChannels) != 0; glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, cc->ping_pong_ssbos[output_ssbo_idx]); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, cp->filters[sp->filter_slot].ssbo); @@ -945,7 +956,7 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame case BeamformerShaderKind_DAS:{ local_persist u32 das_cycle_t = 0; - u32 local_flags = cp->das_bake.shader_flags; + u32 local_flags = cp->shader_flags[shader_slot]; b32 fast = (local_flags & BeamformerShaderDASFlags_Fast) != 0; b32 sparse = (local_flags & BeamformerShaderDASFlags_Sparse) != 0; diff --git a/beamformer.h b/beamformer.h @@ -181,6 +181,8 @@ struct BeamformerComputePlan { BeamformerShaderFilterBakeParameters filter_bake; BeamformerShaderDASBakeParameters das_bake; + u32 shader_flags[BeamformerMaxComputeShaderStages]; + BeamformerComputePlan *next; }; diff --git a/beamformer.meta b/beamformer.meta @@ -59,7 +59,6 @@ @BakeInt(OutputChannelStride output_channel_stride ) @BakeInt(OutputSampleStride output_sample_stride ) @BakeInt(OutputTransmitStride output_transmit_stride) - @BakeInt(ShaderFlags shader_flags ) @BakeInt(TransmitCount transmit_count ) } } @@ -81,7 +80,6 @@ @BakeInt(OutputChannelStride output_channel_stride ) @BakeInt(OutputSampleStride output_sample_stride ) @BakeInt(OutputTransmitStride output_transmit_stride) - @BakeInt(ShaderFlags shader_flags ) @BakeInt(SamplingMode sampling_mode ) @BakeFloat(DemodulationFrequency demodulation_frequency) @BakeFloat(SamplingFrequency sampling_frequency ) @@ -103,7 +101,6 @@ @BakeInt(ChannelCount channel_count ) @BakeInt(DataKind data_kind ) @BakeInt(SampleCount sample_count ) - @BakeInt(ShaderFlags shader_flags ) @BakeInt(AcquisitionKind acquisition_kind ) @BakeFloat(DemodulationFrequency demodulation_frequency) @BakeFloat(FNumber f_number ) diff --git a/build.c b/build.c @@ -4,7 +4,7 @@ /* TODO(rnp): * [ ]: refactor: merge pack_table and bake_parameters * [ ]: refactor: allow @Expand to come before the table definition - * [ ]: refactor: helper for appending expanded shader flags + * [x]: refactor: helper for appending expanded shader flags * [ ]: refactor: "base" shaders should only be reloadable shaders * - internally when a shader with no file is encountered it should * not get pushed as a "base" shader. @@ -2038,7 +2038,7 @@ meta_push_shader_reload_info(MetaprogramContext *m, MetaContext *ctx) } meta_end_scope(m, s8("};\n")); - meta_begin_scope(m, s8("read_only global s8 beamformer_shader_local_header_strings[] = {")); + meta_begin_scope(m, s8("read_only global s8 *beamformer_shader_flag_strings[] = {")); for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { if (ctx->base_shaders.data[shader].file.len == 0) continue; @@ -2046,15 +2046,28 @@ meta_push_shader_reload_info(MetaprogramContext *m, MetaContext *ctx) s8_list *flag_list = ctx->flags_for_shader.data + s->flag_list_id; if (flag_list->count) { - meta_push_line(m, s8("s8_comp(\"\"")); - metagen_push_counted_enum_body(m, s8("ShaderFlags_"), s8("\"#define "), s8("(1 << "), s8(")\\n\""), - flag_list->data, flag_list->count); - meta_push_line(m, s8("\"\\n\"),")); + meta_begin_scope(m, s8("(s8 []){")); + for (iz flag = 0; flag < flag_list->count; flag++) + meta_push_line(m, s8("s8_comp(\""), flag_list->data[flag], s8("\"),")); + meta_end_scope(m, s8("},")); } else { - meta_push_line(m, s8("{0},")); + meta_push_line(m, s8("0,")); } } meta_end_scope(m, s8("};\n")); + + meta_begin_scope(m, s8("read_only global u8 beamformer_shader_flag_strings_count[] = {")); + for (iz shader = 0; shader < ctx->base_shaders.count; shader++) { + if (ctx->base_shaders.data[shader].file.len == 0) continue; + + MetaShader *s = ctx->base_shaders.data[shader].shader; + s8_list *flag_list = ctx->flags_for_shader.data + s->flag_list_id; + + meta_indent(m); + meta_push_u64(m, (u64)flag_list->count); + meta_end_line(m, s8(",")); + } + meta_end_scope(m, s8("};\n")); } function b32 diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c @@ -92,10 +92,9 @@ typedef union { u32 output_channel_stride; u32 output_sample_stride; u32 output_transmit_stride; - u32 shader_flags; u32 transmit_count; }; - u32 E[10]; + u32 E[9]; } BeamformerShaderDecodeBakeParameters; typedef union { @@ -109,12 +108,11 @@ typedef union { u32 output_channel_stride; u32 output_sample_stride; u32 output_transmit_stride; - u32 shader_flags; u32 sampling_mode; f32 demodulation_frequency; f32 sampling_frequency; }; - u32 E[13]; + u32 E[12]; } BeamformerShaderFilterBakeParameters; typedef union { @@ -123,7 +121,6 @@ typedef union { u32 channel_count; u32 data_kind; u32 sample_count; - u32 shader_flags; u32 acquisition_kind; f32 demodulation_frequency; f32 f_number; @@ -131,7 +128,7 @@ typedef union { f32 speed_of_sound; f32 time_offset; }; - u32 E[11]; + u32 E[10]; } BeamformerShaderDASBakeParameters; read_only global s8 beamformer_shader_names[] = { @@ -223,25 +220,34 @@ read_only global s8 beamformer_shader_global_header_strings[] = { "\n"), }; -read_only global s8 beamformer_shader_local_header_strings[] = { - s8_comp("" - "#define ShaderFlags_DilateOutput (1 << 0)\n" - "\n"), - s8_comp("" - "#define ShaderFlags_ComplexFilter (1 << 0)\n" - "#define ShaderFlags_MapChannels (1 << 1)\n" - "#define ShaderFlags_Demodulate (1 << 2)\n" - "\n"), - s8_comp("" - "#define ShaderFlags_Fast (1 << 0)\n" - "#define ShaderFlags_Sparse (1 << 1)\n" - "#define ShaderFlags_Interpolate (1 << 2)\n" - "#define ShaderFlags_CoherencyWeighting (1 << 3)\n" - "#define ShaderFlags_ReceiveOnly (1 << 4)\n" - "\n"), - {0}, - {0}, - {0}, +read_only global s8 *beamformer_shader_flag_strings[] = { + (s8 []){ + s8_comp("DilateOutput"), + }, + (s8 []){ + s8_comp("ComplexFilter"), + s8_comp("MapChannels"), + s8_comp("Demodulate"), + }, + (s8 []){ + s8_comp("Fast"), + s8_comp("Sparse"), + s8_comp("Interpolate"), + s8_comp("CoherencyWeighting"), + s8_comp("ReceiveOnly"), + }, + 0, + 0, + 0, +}; + +read_only global u8 beamformer_shader_flag_strings_count[] = { + 1, + 3, + 5, + 0, + 0, + 0, }; read_only global i32 *beamformer_shader_header_vectors[] = { @@ -272,7 +278,6 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = { s8_comp("OutputChannelStride"), s8_comp("OutputSampleStride"), s8_comp("OutputTransmitStride"), - s8_comp("ShaderFlags"), s8_comp("TransmitCount"), }, (s8 []){ @@ -285,7 +290,6 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = { s8_comp("OutputChannelStride"), s8_comp("OutputSampleStride"), s8_comp("OutputTransmitStride"), - s8_comp("ShaderFlags"), s8_comp("SamplingMode"), s8_comp("DemodulationFrequency"), s8_comp("SamplingFrequency"), @@ -295,7 +299,6 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = { s8_comp("ChannelCount"), s8_comp("DataKind"), s8_comp("SampleCount"), - s8_comp("ShaderFlags"), s8_comp("AcquisitionKind"), s8_comp("DemodulationFrequency"), s8_comp("FNumber"), @@ -309,18 +312,18 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = { }; read_only global u8 *beamformer_shader_bake_parameter_is_float[] = { - (u8 []){0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - (u8 []){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, - (u8 []){0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1}, + (u8 []){0, 0, 0, 0, 0, 0, 0, 0, 0}, + (u8 []){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, + (u8 []){0, 0, 0, 0, 0, 1, 1, 1, 1, 1}, 0, 0, 0, }; read_only global i32 beamformer_shader_bake_parameter_counts[] = { + 9, + 12, 10, - 13, - 11, 0, 0, 0, diff --git a/shaders/das.glsl b/shaders/das.glsl @@ -4,7 +4,7 @@ #define TEXTURE_KIND r32f #define RESULT_TYPE_CAST(a) (a).x #define OUTPUT_TYPE_CAST(a) vec4((a).x, 0, 0, 0) - #if (ShaderFlags & ShaderFlags_Fast) == 0 + #if !Fast #define RESULT_TYPE vec2 #define RESULT_LAST_INDEX 1 #endif @@ -13,7 +13,7 @@ #define TEXTURE_KIND rg32f #define RESULT_TYPE_CAST(a) (a).xy #define OUTPUT_TYPE_CAST(a) vec4((a).xy, 0, 0) - #if (ShaderFlags & ShaderFlags_Fast) == 0 + #if !Fast #define RESULT_TYPE vec3 #define RESULT_LAST_INDEX 2 #endif @@ -29,22 +29,12 @@ layout(std430, binding = 1) readonly restrict buffer buffer_1 { #define RESULT_TYPE SAMPLE_TYPE #endif -#if (ShaderFlags & ShaderFlags_Fast) +#if Fast #define RESULT_STORE(a, length_a) RESULT_TYPE(a) + layout(TEXTURE_KIND, binding = 0) restrict uniform image3D u_out_data_tex; #else #define RESULT_STORE(a, length_a) RESULT_TYPE(a, length_a) -#endif - -const bool fast = bool(ShaderFlags & ShaderFlags_Fast); -const bool sparse = bool(ShaderFlags & ShaderFlags_Sparse); -const bool interpolate = bool(ShaderFlags & ShaderFlags_Interpolate); -const bool coherency_weighting = bool(ShaderFlags & ShaderFlags_CoherencyWeighting); -const bool receive_only = bool(ShaderFlags & ShaderFlags_ReceiveOnly); - -#if (ShaderFlags & ShaderFlags_Fast) -layout(TEXTURE_KIND, binding = 0) restrict uniform image3D u_out_data_tex; -#else -layout(TEXTURE_KIND, binding = 0) writeonly restrict uniform image3D u_out_data_tex; + layout(TEXTURE_KIND, binding = 0) writeonly restrict uniform image3D u_out_data_tex; #endif layout(r16i, binding = 1) readonly restrict uniform iimage1D sparse_elements; @@ -105,10 +95,10 @@ SAMPLE_TYPE cubic(const int base_index, const float index) SAMPLE_TYPE sample_rf(const int channel, const int transmit, const float index) { - SAMPLE_TYPE result = SAMPLE_TYPE(index >= 0.0f) * SAMPLE_TYPE((int(index) + 1 + int(interpolate)) < SampleCount); + SAMPLE_TYPE result = SAMPLE_TYPE(index >= 0.0f) * SAMPLE_TYPE((int(index) + 1 + Interpolate) < SampleCount); int base_index = int(channel * SampleCount * AcquisitionCount + transmit * SampleCount); - if (interpolate) result *= cubic(base_index, index); - else result *= rf_data[base_index + int(round(index))]; + if (bool(Interpolate)) result *= cubic(base_index, index); + else result *= rf_data[base_index + int(round(index))]; result = rotate_iq(result, index / SamplingFrequency); return result; } @@ -153,7 +143,7 @@ float cylindrical_wave_transmit_distance(const vec3 point, const float focal_dep float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, const int transmit_receive_orientation) { float result = 0; - if (!receive_only) { + if (!bool(ReceiveOnly)) { bool tx_rows = (transmit_receive_orientation & TX_ORIENTATION_MASK) == 0; float transmit_angle = radians(focal_vector.x); float focal_depth = focal_vector.y; @@ -169,8 +159,8 @@ float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, con RESULT_TYPE RCA(const vec3 world_point) { - const int acquisition_start = fast? u_channel : 0; - const int acquisition_end = fast? u_channel + 1 : AcquisitionCount; + const int acquisition_start = bool(Fast)? u_channel : 0; + const int acquisition_end = bool(Fast)? u_channel + 1 : AcquisitionCount; RESULT_TYPE result = RESULT_TYPE(0); for (int acquisition = acquisition_start; acquisition < acquisition_end; acquisition++) { int transmit_receive_orientation = imageLoad(transmit_receive_orientations, acquisition).x; @@ -196,8 +186,8 @@ RESULT_TYPE RCA(const vec3 world_point) RESULT_TYPE HERCULES(const vec3 world_point) { - const int rx_channel_start = fast? u_channel : 0; - const int rx_channel_end = fast? u_channel + 1 : ChannelCount; + const int rx_channel_start = bool(Fast)? u_channel : 0; + const int rx_channel_end = bool(Fast)? u_channel + 1 : ChannelCount; int transmit_receive_orientation = imageLoad(transmit_receive_orientations, 0).x; vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; @@ -206,8 +196,8 @@ RESULT_TYPE HERCULES(const vec3 world_point) transmit_receive_orientation); RESULT_TYPE result = RESULT_TYPE(0); - for (int transmit = int(sparse); transmit < AcquisitionCount; transmit++) { - int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit; + for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) { + int tx_channel = bool(Sparse) ? imageLoad(sparse_elements, transmit - Sparse).x : transmit; for (int rx_channel = rx_channel_start; rx_channel < rx_channel_end; rx_channel++) { vec3 element_position; if (rx_cols) element_position = vec3(rx_channel, tx_channel, 0) * vec3(xdc_element_pitch, 0); @@ -230,8 +220,8 @@ RESULT_TYPE HERCULES(const vec3 world_point) RESULT_TYPE FORCES(const vec3 world_point) { - const int rx_channel_start = fast? u_channel : 0; - const int rx_channel_end = fast? u_channel + 1 : ChannelCount; + const int rx_channel_start = bool(Fast)? u_channel : 0; + const int rx_channel_end = bool(Fast)? u_channel + 1 : ChannelCount; RESULT_TYPE result = RESULT_TYPE(0); vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; @@ -240,8 +230,8 @@ RESULT_TYPE FORCES(const vec3 world_point) float apodization = apodize(FNumber * radians(180) / abs(xdc_world_point.z) * (xdc_world_point.x - rx_channel * xdc_element_pitch.x)); if (apodization > 0) { - for (int transmit = int(sparse); transmit < AcquisitionCount; transmit++) { - int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit; + for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) { + int tx_channel = bool(Sparse) ? imageLoad(sparse_elements, transmit - Sparse).x : transmit; vec3 transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(ChannelCount / 2)), 0); float sidx = sample_index(distance(xdc_world_point, transmit_center) + receive_distance); @@ -259,7 +249,7 @@ void main() if (!all(lessThan(out_voxel, imageSize(u_out_data_tex)))) return; -#if (ShaderFlags & ShaderFlags_Fast) +#if Fast RESULT_TYPE sum = RESULT_TYPE_CAST(imageLoad(u_out_data_tex, out_voxel)); #else RESULT_TYPE sum = RESULT_TYPE(0); @@ -288,12 +278,10 @@ void main() }break; } - #if (ShaderFlags & ShaderFlags_Fast) == 0 - /* TODO(rnp): scale such that brightness remains ~constant */ - if (coherency_weighting) { + #if CoherencyWeighting + /* TODO(rnp): scale such that brightness remains ~constant */ float denominator = sum[RESULT_LAST_INDEX] + float(sum[RESULT_LAST_INDEX] == 0); RESULT_TYPE_CAST(sum) *= RESULT_TYPE_CAST(sum) / denominator; - } #endif imageStore(u_out_data_tex, out_voxel, OUTPUT_TYPE_CAST(sum)); diff --git a/shaders/decode.glsl b/shaders/decode.glsl @@ -23,7 +23,7 @@ #elif DataKind == DataKind_Int16 #define INPUT_DATA_TYPE int #define RF_SAMPLES_PER_INDEX 2 - #if (ShaderFlags & ShaderFlags_DilateOutput) + #if DilateOutput #define SAMPLE_DATA_TYPE vec4 #define SAMPLE_TYPE_CAST(x) vec4(((x) << 16) >> 16, 0, (x) >> 16, 0) #else diff --git a/shaders/filter.glsl b/shaders/filter.glsl @@ -9,7 +9,7 @@ #define SAMPLE_TYPE_CAST(v) unpackSnorm2x16(v) #endif -#if (ShaderFlags & ShaderFlags_ComplexFilter) +#if ComplexFilter #define FILTER_TYPE vec2 #define apply_filter(iq, h) complex_mul((iq), (h)) #else @@ -31,8 +31,6 @@ layout(std430, binding = 3) readonly restrict buffer buffer_3 { layout(r16i, binding = 1) readonly restrict uniform iimage1D channel_mapping; -const bool map_channels = (ShaderFlags & ShaderFlags_MapChannels) != 0; - vec2 complex_mul(vec2 a, vec2 b) { mat2 m = mat2(b.x, b.y, -b.y, b.x); @@ -40,7 +38,7 @@ vec2 complex_mul(vec2 a, vec2 b) return result; } -#if (ShaderFlags & ShaderFlags_Demodulate) +#if Demodulate vec2 rotate_iq(vec2 iq, int index) { vec2 result; @@ -84,14 +82,14 @@ void main() uint channel = gl_GlobalInvocationID.y; uint transmit = gl_GlobalInvocationID.z; - uint in_channel = map_channels ? imageLoad(channel_mapping, int(channel)).x : channel; + uint in_channel = bool(MapChannels) ? imageLoad(channel_mapping, int(channel)).x : channel; uint in_offset = InputChannelStride * in_channel + InputTransmitStride * transmit; uint out_offset = OutputChannelStride * channel + OutputTransmitStride * transmit + OutputSampleStride * out_sample; int target; - if (map_channels) { + if (bool(MapChannels)) { target = OutputChannelStride / OutputSampleStride; } else { target = OutputTransmitStride; @@ -104,12 +102,12 @@ void main() int a_length = target; int index = int(in_sample); - const float scale = bool(ShaderFlags & ShaderFlags_ComplexFilter) ? 1 : sqrt(2); + const float scale = bool(ComplexFilter) ? 1 : sqrt(2); for (int j = max(0, index - FilterLength); j < min(index, a_length); j++) { vec2 iq = sample_rf(in_offset + j); FILTER_TYPE h = filter_coefficients[index - j]; - #if (ShaderFlags & ShaderFlags_Demodulate) + #if Demodulate result += scale * apply_filter(rotate_iq(iq * vec2(1, -1), -j), h); #else result += apply_filter(iq, h);