Commit: 51b580cf67ba027ff9a1b12942460dda838e3a70
Parent: 001e7f48eff42b37b1128fd85161f984276e14cc
Author: Randy Palamar
Date: Tue, 20 Aug 2024 14:22:59 -0600
move beat frequency multiplication to demod shader
This allows demodulation to be more easily disabled
Diffstat:
10 files changed, 73 insertions(+), 71 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -129,7 +129,7 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
csctx->raw_data_fences[csctx->raw_data_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
break;
- case CS_LPF:
+ case CS_DEMOD:
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]);
glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32),
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -2,9 +2,9 @@
enum compute_shaders {
/* TODO: Probably this should be split up */
CS_CUDA_DECODE_AND_DEMOD = 0,
- CS_HADAMARD = 1,
- CS_HERCULES = 2,
- CS_LPF = 3,
+ CS_DEMOD = 1,
+ CS_HADAMARD = 2,
+ CS_HERCULES = 3,
CS_MIN_MAX = 4,
CS_UFORCES = 5,
CS_LAST
@@ -24,7 +24,7 @@ typedef struct {
v2 xdc_min_xy; /* [m] Min center of transducer elements */
v2 xdc_max_xy; /* [m] Max center of transducer elements */
u32 channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */
- u32 lpf_order; /* Order of Low Pass Filter */
+ i32 lpf_order; /* Order of Low Pass Filter (-1 if disabled) */
f32 speed_of_sound; /* [m/s] */
f32 sampling_frequency; /* [Hz] */
f32 center_frequency; /* [Hz] */
diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c
@@ -141,9 +141,9 @@ set_beamformer_pipeline(char *shm_name, i32 *stages, i32 stages_count)
for (i32 i = 0; i < stages_count; i++) {
switch (stages[i]) {
case CS_CUDA_DECODE_AND_DEMOD:
+ case CS_DEMOD:
case CS_HADAMARD:
case CS_HERCULES:
- case CS_LPF:
case CS_MIN_MAX:
case CS_UFORCES:
g_bp->compute_stages[i] = stages[i];
diff --git a/main.c b/main.c
@@ -6,7 +6,7 @@ static os_library_handle g_cuda_lib_handle;
static char *compute_shader_paths[CS_LAST] = {
[CS_HADAMARD] = "shaders/hadamard.glsl",
[CS_HERCULES] = "shaders/2d_hercules.glsl",
- [CS_LPF] = "shaders/lpf.glsl",
+ [CS_DEMOD] = "shaders/demod.glsl",
[CS_MIN_MAX] = "shaders/min_max.glsl",
[CS_UFORCES] = "shaders/uforces.glsl",
};
@@ -182,7 +182,7 @@ main(void)
ctx.params->raw.output_points = ctx.out_data_dim;
/* NOTE: default compute shader pipeline */
ctx.params->compute_stages[0] = CS_HADAMARD;
- ctx.params->compute_stages[1] = CS_LPF;
+ ctx.params->compute_stages[1] = CS_DEMOD;
ctx.params->compute_stages[2] = CS_UFORCES;
ctx.params->compute_stages[3] = CS_MIN_MAX;
ctx.params->compute_stages_count = 4;
diff --git a/shaders/2d_hercules.glsl b/shaders/2d_hercules.glsl
@@ -18,7 +18,7 @@ layout(std140, binding = 0) uniform parameters{
vec2 xdc_min_xy; /* [m] Min center of transducer elements */
vec2 xdc_max_xy; /* [m] Max center of transducer elements */
uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */
- uint lpf_order; /* Order of Low Pass Filter */
+ int lpf_order; /* Order of Low Pass Filter (-1 if disabled) */
float speed_of_sound; /* [m/s] */
float sampling_frequency; /* [Hz] */
float center_frequency; /* [Hz] */
diff --git a/shaders/demod.glsl b/shaders/demod.glsl
@@ -0,0 +1,59 @@
+/* See LICENSE for license details. */
+#version 460 core
+layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
+
+layout(std430, binding = 1) readonly restrict buffer buffer_1 {
+ vec2 in_data[];
+};
+
+layout(std430, binding = 2) writeonly restrict buffer buffer_2 {
+ vec2 out_data[];
+};
+
+layout(std140, binding = 0) uniform parameters {
+ uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */
+ uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */
+ vec4 lpf_coefficients[16]; /* Low Pass Filter Cofficients */
+ uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */
+ uvec4 output_points; /* Width * Height * Depth; last element ignored */
+ uvec2 rf_raw_dim; /* Raw Data Dimensions */
+ vec2 output_min_xz; /* [m] Top left corner of output region */
+ vec2 output_max_xz; /* [m] Bottom right corner of output region */
+ vec2 xdc_min_xy; /* [m] Min center of transducer elements */
+ vec2 xdc_max_xy; /* [m] Max center of transducer elements */
+ uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */
+ int lpf_order; /* Order of Low Pass Filter (-1 if disabled) */
+ float speed_of_sound; /* [m/s] */
+ float sampling_frequency; /* [Hz] */
+ float center_frequency; /* [Hz] */
+ float focal_depth; /* [m] */
+ float time_offset; /* pulse length correction time [s] */
+ uint uforces; /* mode is UFORCES (1) or FORCES (0) */
+ float off_axis_pos; /* Where on the 3rd axis to render the image (Hercules only) */
+};
+
+void main()
+{
+ uint time_sample = gl_GlobalInvocationID.x;
+ uint channel = gl_GlobalInvocationID.y;
+ uint acq = gl_GlobalInvocationID.z;
+
+ /* NOTE: offsets for storing the results in the output data */
+ uint stride = dec_data_dim.x * dec_data_dim.y;
+ uint off = dec_data_dim.x * channel + stride * acq + time_sample;
+
+ /* NOTE: for calculating full-band I-Q data; needs to be stepped in loop */
+ float arg = radians(360) * center_frequency * time_sample / sampling_frequency;
+ float arg_delta = radians(360) * center_frequency / sampling_frequency;
+
+ vec2 sum = vec2(0);
+ for (int i = 0; i <= lpf_order; i++) {
+ vec2 data;
+ /* NOTE: make sure data samples come from the same acquisition */
+ if (time_sample >= i) data = in_data[off - i].xx * vec2(cos(arg), sin(arg));
+ else data = vec2(0);
+ sum += lpf_coefficients[i / 4][i % 4] * data;
+ arg -= arg_delta;
+ }
+ out_data[off] = sum;
+}
diff --git a/shaders/hadamard.glsl b/shaders/hadamard.glsl
@@ -26,7 +26,7 @@ layout(std140, binding = 0) uniform parameters {
vec2 xdc_min_xy; /* [m] Min center of transducer elements */
vec2 xdc_max_xy; /* [m] Max center of transducer elements */
uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */
- uint lpf_order; /* Order of Low Pass Filter */
+ int lpf_order; /* Order of Low Pass Filter (-1 if disabled) */
float speed_of_sound; /* [m/s] */
float sampling_frequency; /* [Hz] */
float center_frequency; /* [Hz] */
@@ -79,8 +79,5 @@ void main()
sum += hadamard[hoff + i] * data;
ridx += ridx_delta;
}
-
- float arg = radians(360) * center_frequency * time_sample / sampling_frequency;
- out_data[out_off].x = float(sum) * cos(arg);
- out_data[out_off].y = float(sum) * sin(arg);
+ out_data[out_off] = vec2(float(sum), 0);
}
diff --git a/shaders/lpf.glsl b/shaders/lpf.glsl
@@ -1,54 +0,0 @@
-/* See LICENSE for license details. */
-#version 460 core
-layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
-
-layout(std430, binding = 1) readonly restrict buffer buffer_1 {
- vec2 in_data[];
-};
-
-layout(std430, binding = 2) writeonly restrict buffer buffer_2 {
- vec2 out_data[];
-};
-
-layout(std140, binding = 0) uniform parameters {
- uvec4 channel_mapping[64]; /* Transducer Channel to Verasonics Channel */
- uvec4 uforces_channels[32]; /* Channels used for virtual UFORCES elements */
- vec4 lpf_coefficients[16]; /* Low Pass Filter Cofficients */
- uvec4 dec_data_dim; /* Samples * Channels * Acquisitions; last element ignored */
- uvec4 output_points; /* Width * Height * Depth; last element ignored */
- uvec2 rf_raw_dim; /* Raw Data Dimensions */
- vec2 output_min_xz; /* [m] Top left corner of output region */
- vec2 output_max_xz; /* [m] Bottom right corner of output region */
- vec2 xdc_min_xy; /* [m] Min center of transducer elements */
- vec2 xdc_max_xy; /* [m] Max center of transducer elements */
- uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */
- uint lpf_order; /* Order of Low Pass Filter */
- float speed_of_sound; /* [m/s] */
- float sampling_frequency; /* [Hz] */
- float center_frequency; /* [Hz] */
- float focal_depth; /* [m] */
- float time_offset; /* pulse length correction time [s] */
- uint uforces; /* mode is UFORCES (1) or FORCES (0) */
- float off_axis_pos; /* Where on the 3rd axis to render the image (Hercules only) */
-};
-
-void main()
-{
- uint time_sample = gl_GlobalInvocationID.x;
- uint channel = gl_GlobalInvocationID.y;
- uint acq = gl_GlobalInvocationID.z;
-
- /* NOTE: offsets for storing the results in the output data */
- uint stride = dec_data_dim.x * dec_data_dim.y;
- uint off = dec_data_dim.x * channel + stride * acq + time_sample;
-
- vec2 sum = vec2(0);
- for (int i = 0; i <= lpf_order; i++) {
- vec2 data;
- /* NOTE: make sure data samples come from the same acquisition */
- if (time_sample > i) data = in_data[off - i];
- else data = vec2(0);
- sum += lpf_coefficients[i / 4][i % 4] * data;
- }
- out_data[off] = sum;
-}
diff --git a/shaders/uforces.glsl b/shaders/uforces.glsl
@@ -18,7 +18,7 @@ layout(std140, binding = 0) uniform parameters {
vec2 xdc_min_xy; /* [m] Min center of transducer elements */
vec2 xdc_max_xy; /* [m] Max center of transducer elements */
uint channel_offset; /* Offset into channel_mapping: 0 or 128 (rows or columns) */
- uint lpf_order; /* Order of Low Pass Filter */
+ int lpf_order; /* Order of Low Pass Filter (-1 if disabled) */
float speed_of_sound; /* [m/s] */
float sampling_frequency; /* [Hz] */
float center_frequency; /* [Hz] */
@@ -90,7 +90,7 @@ void main()
float dzsign = sign(image_point.z - focal_depth);
/* NOTE: offset correcting for both pulse length and low pass filtering */
- float time_correction = time_offset + (lpf_order + 1)/sampling_frequency;
+ float time_correction = time_offset + (lpf_order + 1) / sampling_frequency;
vec2 sum = vec2(0);
/* NOTE: skip first acquisition in uforces since its garbage */
diff --git a/ui.c b/ui.c
@@ -346,7 +346,7 @@ draw_debug_overlay(BeamformerCtx *ctx, Arena arena, Rect r)
[CS_CUDA_DECODE_AND_DEMOD] = s8("CUDA Decoding:"),
[CS_HADAMARD] = s8("Decoding:"),
[CS_HERCULES] = s8("HERCULES:"),
- [CS_LPF] = s8("LPF:"),
+ [CS_DEMOD] = s8("Demodulation:"),
[CS_MIN_MAX] = s8("Min/Max:"),
[CS_UFORCES] = s8("UFORCES:"),
};