send raw rf data size and decoded size separately - ogl_beamforming - Ultrasound Beamforming Implemented with OpenGL

Commit: 3b3aec3f666b5eecc311a99b5f712bfa8431412e
Parent: 5d3f02b4e90c61bf644a4a13b9f4037ba08a7298
Author: Randy Palamar
Date:   Wed, 24 Jul 2024 12:47:28 -0600

send raw rf data size and decoded size separately

These are two separate sizes and they are both needed to do things correctly.

Diffstat:
M beamformer.c  | 34 ++++++++++++++++++----------------
M beamformer.h  | 3 ++-
M beamformer_parameters.h  | 6 +++---
M helpers/ogl_beamformer_lib.c  | 8 ++++----
M helpers/ogl_beamformer_lib.h  | 3 ++-
M shaders/hadamard.glsl  | 14 +++++++-------
M shaders/lpf.glsl  | 8 ++++----
M shaders/uforces.glsl  | 16 ++++++++--------

8 files changed, 48 insertions(+), 44 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -51,10 +51,12 @@ static void
 alloc_shader_storage(BeamformerCtx *ctx, Arena a)
 {
 	BeamformerParameters *bp = &ctx->params->raw;
-	uv4 rf_data_dim          = bp->rf_data_dim;
-	size rf_raw_size         = bp->channel_data_stride * rf_data_dim.y * rf_data_dim.z * sizeof(i16);
-	size rf_decoded_size     = rf_data_dim.x * rf_data_dim.y * rf_data_dim.z * sizeof(f32) * 2;
-	ctx->csctx.rf_data_dim   = rf_data_dim;
+	uv4 dec_data_dim         = bp->dec_data_dim;
+	uv2 rf_raw_dim           = bp->rf_raw_dim;
+	size rf_raw_size         = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16);
+	size rf_decoded_size     = dec_data_dim.x * dec_data_dim.y * dec_data_dim.z * sizeof(f32) * 2;
+	ctx->csctx.rf_raw_dim    = rf_raw_dim;
+	ctx->csctx.dec_data_dim  = dec_data_dim;
 
 	glDeleteBuffers(ARRAY_COUNT(ctx->csctx.rf_data_ssbos), ctx->csctx.rf_data_ssbos);
 	glDeleteBuffers(1, &ctx->csctx.raw_data_ssbo);
@@ -70,10 +72,10 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a)
 	}
 
 	/* NOTE: store hadamard in GPU once; it won't change for a particular imaging session */
-	ctx->csctx.hadamard_dim = (uv2){.x = rf_data_dim.z, .y = rf_data_dim.z};
-	size hadamard_elements  = rf_data_dim.z * rf_data_dim.z;
+	ctx->csctx.hadamard_dim = (uv2){.x = dec_data_dim.z, .y = dec_data_dim.z};
+	size hadamard_elements  = dec_data_dim.z * dec_data_dim.z;
 	i32  *hadamard          = alloc(&a, i32, hadamard_elements);
-	fill_hadamard(hadamard, rf_data_dim.z);
+	fill_hadamard(hadamard, dec_data_dim.z);
 
 	rlUnloadShaderBuffer(ctx->csctx.hadamard_ssbo);
 	ctx->csctx.hadamard_ssbo = rlLoadShaderBuffer(hadamard_elements * sizeof(i32), hadamard,
@@ -96,9 +98,9 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->raw_data_ssbo);
 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]);
 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, csctx->hadamard_ssbo);
-		glDispatchCompute(ORONE(csctx->rf_data_dim.x / 32),
-		                  ORONE(csctx->rf_data_dim.y / 32),
-		                  ORONE(csctx->rf_data_dim.z));
+		glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32),
+		                  ORONE(csctx->dec_data_dim.y / 32),
+		                  ORONE(csctx->dec_data_dim.z));
 		csctx->last_active_ssbo_index = !csctx->last_active_ssbo_index;
 		break;
 	case CS_LPF:
@@ -108,9 +110,9 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
 		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, csctx->lpf_ssbo);
 		glUniform1i(csctx->lpf_order_id, csctx->lpf_order);
 		#endif
-		glDispatchCompute(ORONE(csctx->rf_data_dim.x / 32),
-		                  ORONE(csctx->rf_data_dim.y / 32),
-		                  ORONE(csctx->rf_data_dim.z));
+		glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32),
+		                  ORONE(csctx->dec_data_dim.y / 32),
+		                  ORONE(csctx->dec_data_dim.z));
 		csctx->last_active_ssbo_index = !csctx->last_active_ssbo_index;
 		break;
 	case CS_MIN_MAX:
@@ -405,7 +407,7 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
 	BeamformerParameters *bp = &ctx->params->raw;
 	/* NOTE: Check for and Load RF Data into GPU */
 	if (os_poll_pipe(ctx->data_pipe)) {
-		if (!uv4_equal(ctx->csctx.rf_data_dim, bp->rf_data_dim) || ctx->flags & ALLOC_SSBOS)
+		if (!uv4_equal(ctx->csctx.dec_data_dim, bp->dec_data_dim) || ctx->flags & ALLOC_SSBOS)
 			alloc_shader_storage(ctx, arena);
 		if (!uv4_equal(ctx->out_data_dim, bp->output_points) || ctx->flags & ALLOC_OUT_TEX)
 			alloc_output_image(ctx);
@@ -413,8 +415,8 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
 		glBindBuffer(GL_SHADER_STORAGE_BUFFER, ctx->csctx.raw_data_ssbo);
 		void *rf_data_buf = glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_WRITE_ONLY);
 		ASSERT(rf_data_buf);
-		uv4  rf_data_dim  = ctx->csctx.rf_data_dim;
-		size rf_raw_size  = rf_data_dim.x * rf_data_dim.y * rf_data_dim.z * sizeof(i16);
+		uv2  rf_raw_dim   = ctx->csctx.rf_raw_dim;
+		size rf_raw_size  = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16);
 		size rlen         = os_read_pipe_data(ctx->data_pipe, rf_data_buf, rf_raw_size);
 		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
 
diff --git a/beamformer.h b/beamformer.h
@@ -64,7 +64,8 @@ typedef struct {
 
 	u32 shared_ubo;
 
-	uv4 rf_data_dim;
+	uv4 dec_data_dim;
+	uv2 rf_raw_dim;
 	i32 out_data_tex_id;
 	i32 mip_view_tex_id;
 	i32 mips_level_id;
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -6,18 +6,18 @@ typedef struct {
 	u32 channel_mapping[256];   /* Transducer Channel to Verasonics Channel */
 	u32 uforces_channels[128];  /* Channels used for virtual UFORCES elements */
 	f32 lpf_coefficients[64];   /* Low Pass Filter Cofficients */
-	uv4 rf_data_dim;            /* Samples * Channels * Acquisitions; last element ignored */
+	uv4 dec_data_dim;           /* Samples * Channels * Acquisitions; last element ignored */
 	uv4 output_points;          /* Width * Height * Depth; last element ignored */
+	uv2 rf_raw_dim;             /* Raw Data Dimensions */
 	v2  output_min_xz;          /* [m] Top left corner of output region */
 	v2  output_max_xz;          /* [m] Bottom right corner of output region */
 	v2  xdc_min_xy;             /* [m] Min center of transducer elements */
 	v2  xdc_max_xy;             /* [m] Max center of transducer elements */
-	u32 channel_data_stride;    /* Data points between channels (samples * acq + padding) */
 	u32 channel_offset;         /* Offset into channel_mapping: 0 or 128 (rows or columns) */
 	u32 lpf_order;              /* Order of Low Pass Filter */
 	f32 speed_of_sound;         /* [m/s] */
 	f32 sampling_frequency;     /* [Hz]  */
 	f32 center_frequency;       /* [Hz]  */
 	f32 focal_depth;            /* [m]   */
-	f32 _pad[2];
+	f32 _pad[3];
 } BeamformerParameters;
diff --git a/helpers/ogl_beamformer_lib.c b/helpers/ogl_beamformer_lib.c
@@ -124,7 +124,7 @@ check_shared_memory(char *name)
 }
 
 void
-send_data(char *pipe_name, char *shm_name, i16 *data, uv4 data_dim)
+send_data(char *pipe_name, char *shm_name, i16 *data, uv2 data_dim)
 {
 	if (g_pipe.file == OS_INVALID_FILE) {
 		g_pipe = os_open_named_pipe(pipe_name);
@@ -136,9 +136,9 @@ send_data(char *pipe_name, char *shm_name, i16 *data, uv4 data_dim)
 
 	check_shared_memory(shm_name);
 	/* TODO: this probably needs a mutex around it if we want to change it here */
-	g_bp->raw.rf_data_dim = data_dim;
-	size data_size        = data_dim.x * data_dim.y * data_dim.z * sizeof(i16);
-	size written          = os_write_to_pipe(g_pipe, data, data_size);
+	g_bp->raw.rf_raw_dim = data_dim;
+	size data_size       = data_dim.x * data_dim.y * sizeof(i16);
+	size written         = os_write_to_pipe(g_pipe, data, data_size);
 	if (written != data_size)
 		mexWarnMsgIdAndTxt("ogl_beamformer:write_error",
 		                   "failed to write full data to pipe: wrote: %ld", written);
diff --git a/helpers/ogl_beamformer_lib.h b/helpers/ogl_beamformer_lib.h
@@ -20,9 +20,10 @@ typedef ptrdiff_t size;
 #endif
 
 typedef struct { f32 x, y; }       v2;
+typedef struct { u32 x, y; }       uv2;
 typedef struct { u32 x, y, z, w; } uv4;
 
 #include "../beamformer_parameters.h"
 
 LIB_FN void set_beamformer_parameters(char *shm_name, BeamformerParameters *);
-LIB_FN void send_data(char *pipe_name, char *shm_name, i16 *data, uv4 data_dim);
+LIB_FN void send_data(char *pipe_name, char *shm_name, i16 *data, uv2 data_dim);
diff --git a/shaders/hadamard.glsl b/shaders/hadamard.glsl
@@ -18,13 +18,13 @@ layout(std140, binding = 0) uniform parameters {
 	uvec4 channel_mapping[64];    /* Transducer Channel to Verasonics Channel */
 	uvec4 uforces_channels[32];   /* Channels used for virtual UFORCES elements */
 	vec4  lpf_coefficients[16];   /* Low Pass Filter Cofficients */
-	uvec4 rf_data_dim;            /* Samples * Channels * Acquisitions; last element ignored */
+	uvec4 dec_data_dim;           /* Samples * Channels * Acquisitions; last element ignored */
 	uvec4 output_points;          /* Width * Height * Depth; last element ignored */
+	uvec2 rf_raw_dim;             /* Raw Data Dimensions */
 	vec2  output_min_xz;          /* [m] Top left corner of output region */
 	vec2  output_max_xz;          /* [m] Bottom right corner of output region */
 	vec2  xdc_min_xy;             /* [m] Min center of transducer elements */
 	vec2  xdc_max_xy;             /* [m] Max center of transducer elements */
-	uint  channel_data_stride;    /* Data points between channels (samples * acq + padding) */
 	uint  channel_offset;         /* Offset into channel_mapping: 0 or 128 (rows or columns) */
 	uint  lpf_order;              /* Order of Low Pass Filter */
 	float speed_of_sound;         /* [m/s] */
@@ -45,10 +45,10 @@ void main()
 	uint acq         = gl_GlobalInvocationID.z;
 
 	/* NOTE: offset to get the correct column in hadamard matrix */
-	uint hoff = rf_data_dim.z * acq;
+	uint hoff = dec_data_dim.z * acq;
 
 	/* NOTE: offsets for storing the results in the output data */
-	uint out_off = rf_data_dim.x * rf_data_dim.y * acq + rf_data_dim.x * channel + time_sample;
+	uint out_off = dec_data_dim.x * dec_data_dim.y * acq + dec_data_dim.x * channel + time_sample;
 
 	uint ch_base_idx = (channel + channel_offset) / 4;
 	uint ch_sub_idx  = (channel + channel_offset) - ch_base_idx * 4;
@@ -56,8 +56,8 @@ void main()
 
 	/* NOTE: stride is the number of samples between acquistions; off is the
 	 * index of the first acquisition for this channel and time sample  */
-	uint rf_stride = rf_data_dim.x;
-	uint rf_off    = channel_data_stride * rf_channel + rf_data_dim.x * acq + time_sample;
+	uint rf_stride = dec_data_dim.x;
+	uint rf_off    = rf_raw_dim.x * rf_channel + dec_data_dim.x * acq + time_sample;
 
 	/* NOTE: rf_data index and stride considering the data is i16 not i32 */
 	uint ridx       = rf_off / 2;
@@ -71,7 +71,7 @@ void main()
 
 	/* NOTE: Compute N-D dot product */
 	int sum = 0;
-	for (int i = 0; i < rf_data_dim.z; i++) {
+	for (int i = 0; i < dec_data_dim.z; i++) {
 		int data = (rf_data[ridx] << lfs) >> 16;
 		sum  += hadamard[hoff + i] * data;
 		ridx += ridx_delta;
diff --git a/shaders/lpf.glsl b/shaders/lpf.glsl
@@ -18,13 +18,13 @@ layout(std140, binding = 0) uniform parameters {
 	uvec4 channel_mapping[64];    /* Transducer Channel to Verasonics Channel */
 	uvec4 uforces_channels[32];   /* Channels used for virtual UFORCES elements */
 	vec4  lpf_coefficients[16];   /* Low Pass Filter Cofficients */
-	uvec4 rf_data_dim;            /* Samples * Channels * Acquisitions; last element ignored */
+	uvec4 dec_data_dim;           /* Samples * Channels * Acquisitions; last element ignored */
 	uvec4 output_points;          /* Width * Height * Depth; last element ignored */
+	uvec2 rf_raw_dim;             /* Raw Data Dimensions */
 	vec2  output_min_xz;          /* [m] Top left corner of output region */
 	vec2  output_max_xz;          /* [m] Bottom right corner of output region */
 	vec2  xdc_min_xy;             /* [m] Min center of transducer elements */
 	vec2  xdc_max_xy;             /* [m] Max center of transducer elements */
-	uint  channel_data_stride;    /* Data points between channels (samples * acq + padding) */
 	uint  channel_offset;         /* Offset into channel_mapping: 0 or 128 (rows or columns) */
 	uint  lpf_order;              /* Order of Low Pass Filter */
 	float speed_of_sound;         /* [m/s] */
@@ -42,8 +42,8 @@ void main()
 	uint acq         = gl_GlobalInvocationID.z;
 
 	/* NOTE: offsets for storing the results in the output data */
-	uint stride = rf_data_dim.x * rf_data_dim.y;
-	uint off    = rf_data_dim.x * channel + time_sample;
+	uint stride = dec_data_dim.x * dec_data_dim.y;
+	uint off    = dec_data_dim.x * channel + time_sample;
 
 	vec2 sum = vec2(0);
 	for (int i = 0; i <= lpf_order; i++) {
diff --git a/shaders/uforces.glsl b/shaders/uforces.glsl
@@ -10,13 +10,13 @@ layout(std140, binding = 0) uniform parameters {
 	uvec4 channel_mapping[64];    /* Transducer Channel to Verasonics Channel */
 	uvec4 uforces_channels[32];   /* Channels used for virtual UFORCES elements */
 	vec4  lpf_coefficients[16];   /* Low Pass Filter Cofficients */
-	uvec4 rf_data_dim;            /* Samples * Channels * Acquisitions; last element ignored */
+	uvec4 dec_data_dim;           /* Samples * Channels * Acquisitions; last element ignored */
 	uvec4 output_points;          /* Width * Height * Depth; last element ignored */
+	uvec2 rf_raw_dim;             /* Raw Data Dimensions */
 	vec2  output_min_xz;          /* [m] Top left corner of output region */
 	vec2  output_max_xz;          /* [m] Bottom right corner of output region */
 	vec2  xdc_min_xy;             /* [m] Min center of transducer elements */
 	vec2  xdc_max_xy;             /* [m] Max center of transducer elements */
-	uint  channel_data_stride;    /* Data points between channels (samples * acq + padding) */
 	uint  channel_offset;         /* Offset into channel_mapping: 0 or 128 (rows or columns) */
 	uint  lpf_order;              /* Order of Low Pass Filter */
 	float speed_of_sound;         /* [m/s] */
@@ -72,16 +72,16 @@ void main()
 	);
 
 	float x      = image_point.x - xdc_min_xy.x;
-	float dx     = xdc_size.x / float(rf_data_dim.y);
+	float dx     = xdc_size.x / float(dec_data_dim.y);
 	float dzsign = sign(image_point.z - focal_depth);
 
 	vec2 sum = vec2(0);
 
 	float time_scale = radians(360) * center_frequency;
 
-	uint ridx = rf_data_dim.y * rf_data_dim.x;
+	uint ridx = dec_data_dim.y * dec_data_dim.x;
 	/* NOTE: skip first acquisition since its garbage */
-	for (uint i = 1; i < rf_data_dim.z; i++) {
+	for (uint i = 1; i < dec_data_dim.z; i++) {
 		uint base_idx = (i - 1) / 4;
 		uint sub_idx  = (i - 1) - base_idx * 4;
 
@@ -89,7 +89,7 @@ void main()
 		float transmit_dist = focal_depth + dzsign * distance(image_point, focal_point);
 
 		vec2 rdist = vec2(x, image_point.z);
-		for (uint j = 0; j < rf_data_dim.y; j++) {
+		for (uint j = 0; j < dec_data_dim.y; j++) {
 			float dist = transmit_dist + length(rdist);
 			float time = dist / speed_of_sound;
 
@@ -99,9 +99,9 @@ void main()
 			p.y     *= sin(time_scale * time);
 			sum     += p;
 			rdist.x -= dx;
-			ridx    += rf_data_dim.x;
+			ridx    += dec_data_dim.x;
 		}
-		ridx += rf_data_dim.y * rf_data_dim.x;
+		ridx += dec_data_dim.y * dec_data_dim.x;
 	}
 	float val = length(sum);
 	imageStore(u_out_data_tex, out_coord, vec4(val, val, 0, 0));

M	beamformer.c	\|	34	++++++++++++++++++----------------
M	beamformer.h	\|	3	++-
M	beamformer_parameters.h	\|	6	+++---
M	helpers/ogl_beamformer_lib.c	\|	8	++++----
M	helpers/ogl_beamformer_lib.h	\|	3	++-
M	shaders/hadamard.glsl	\|	14	+++++++-------
M	shaders/lpf.glsl	\|	8	++++----
M	shaders/uforces.glsl	\|	16	++++++++--------