lib & shaders/das: add special case for single focal vector and orientation - ogl_beamforming - Ultrasound Beamforming Implemented with OpenGL

Commit: f98319eadb856d31bd0b976d53d6a1a7fa6dfd32
Parent: dfd834c4c23e90108067ee642f2be1a3592669c4
Author: Randy Palamar
Date:   Wed,  8 Oct 2025 09:09:26 -0600

lib & shaders/das: add special case for single focal vector and orientation

using this with HERCULES yields a ~4% performance boost which
could be nice for volume reconstruction

Diffstat:
M beamformer.c  | 8 +++++++-
M beamformer.meta  | 16 ++++++++++------
M beamformer_parameters.h  | 24 ++++++++++++++----------
M generated/beamformer.meta.c  | 22 ++++++++++++++++------
M shaders/das.glsl  | 34 +++++++++++++++++++++++-----------
M tests/throughput.c  | 38 ++++++++++++++++++++++++++++++++------

6 files changed, 102 insertions(+), 40 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -453,11 +453,17 @@ das_ubo_from_beamformer_parameters(BeamformerComputePlan *cp, BeamformerDASUBO *
 	cp->das_bake.sample_count           = bp->sample_count;
 	cp->das_bake.channel_count          = bp->channel_count;
 	cp->das_bake.acquisition_count      = bp->acquisition_count;
+	cp->das_bake.transmit_angle         = bp->focal_vector[0];
+	cp->das_bake.focus_depth            = bp->focal_vector[1];
+	cp->das_bake.transmit_receive_orientation = bp->transmit_receive_orientation;
 
 	u32 result = 0;
 	if (bp->coherency_weighting) result |= BeamformerShaderDASFlags_CoherencyWeighting;
 	else                         result |= BeamformerShaderDASFlags_Fast;
 
+	if (bp->single_focus)       result |= BeamformerShaderDASFlags_SingleFocus;
+	if (bp->single_orientation) result |= BeamformerShaderDASFlags_SingleOrientation;
+
 	if (bp->das_shader_id == BeamformerAcquisitionKind_UFORCES || bp->das_shader_id == BeamformerAcquisitionKind_UHERCULES)
 		result |= BeamformerShaderDASFlags_Sparse;
 
@@ -589,7 +595,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
 	u32 input_channel_stride  = pb->parameters.raw_data_dimensions[0];
 
 	BeamformerShaderDecodeBakeParameters *dp = &cp->decode_bake;
-	dp->decode_mode    = pb->parameters.decode;
+	dp->decode_mode    = pb->parameters.decode_mode;
 	dp->transmit_count = cp->das_bake.acquisition_count;
 
 	dp->input_sample_stride    = decode_first? input_sample_stride   : cp->das_bake.acquisition_count;
diff --git a/beamformer.meta b/beamformer.meta
@@ -93,20 +93,24 @@
 		@Enumeration(AcquisitionKind)
 		@Enumeration(DataKind)
 		@Enumeration(RCAOrientation)
-		@Flags([Fast Sparse Interpolate CoherencyWeighting ReceiveOnly])
+		@Flags([Fast Sparse Interpolate CoherencyWeighting ReceiveOnly SingleFocus SingleOrientation])
 
 		@Bake
 		{
-			@BakeInt(AcquisitionCount acquisition_count)
-			@BakeInt(ChannelCount     channel_count    )
-			@BakeInt(DataKind         data_kind        )
-			@BakeInt(SampleCount      sample_count     )
-			@BakeInt(AcquisitionKind  acquisition_kind )
+			@BakeInt(AcquisitionCount           acquisition_count           )
+			@BakeInt(AcquisitionKind            acquisition_kind            )
+			@BakeInt(ChannelCount               channel_count               )
+			@BakeInt(DataKind                   data_kind                   )
+			@BakeInt(SampleCount                sample_count                )
+			@BakeInt(TransmitReceiveOrientation transmit_receive_orientation)
+
 			@BakeFloat(DemodulationFrequency demodulation_frequency)
 			@BakeFloat(FNumber               f_number              )
+			@BakeFloat(FocusDepth            focus_depth           )
 			@BakeFloat(SamplingFrequency     sampling_frequency    )
 			@BakeFloat(SpeedOfSound          speed_of_sound        )
 			@BakeFloat(TimeOffset            time_offset           )
+			@BakeFloat(TransmitAngle         transmit_angle        )
 		}
 	}
 
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -55,16 +55,20 @@ typedef enum {BEAMFORMER_CONSTANTS_LIST} BeamformerConstants;
 
 /* X(name, type, size, matlab_type, elements, comment) */
 #define BEAMFORMER_PARAMS_HEAD \
-	X(xdc_transform,          float,    [16], single, 16, "IMPORTANT: column major order")           \
-	X(xdc_element_pitch,      float,     [2], single,  2, "[m] Transducer Element Pitch {row, col}") \
-	X(raw_data_dimensions,    uint32_t,  [2], uint32,  2, "Raw Data Dimensions")                     \
-	X(sample_count,           uint32_t,     , uint32,  1, "")                                        \
-	X(channel_count,          uint32_t,     , uint32,  1, "")                                        \
-	X(acquisition_count,      uint32_t,     , uint32,  1, "")                                        \
-	X(das_shader_id,          uint32_t,     , uint32,  1, "")                                        \
-	X(time_offset,            float,        , single,  1, "pulse length correction time [s]")        \
-	X(decode,                 uint16_t,     , uint16,  1, "Decode or just reshape data")             \
-	X(sampling_mode,          uint16_t,     , uint16,  1, "")
+	X(xdc_transform,                float,    [16], single, 16, "IMPORTANT: column major order")           \
+	X(xdc_element_pitch,            float,     [2], single,  2, "[m] Transducer Element Pitch {row, col}") \
+	X(raw_data_dimensions,          uint32_t,  [2], uint32,  2, "Raw Data Dimensions")                     \
+	X(focal_vector,                 float,     [2], single,  2, "[degree, m] focal point {angle, depth}")  \
+	X(transmit_receive_orientation, uint32_t,     , uint32,  1, "")                                        \
+	X(sample_count,                 uint32_t,     , uint32,  1, "")                                        \
+	X(channel_count,                uint32_t,     , uint32,  1, "")                                        \
+	X(acquisition_count,            uint32_t,     , uint32,  1, "")                                        \
+	X(das_shader_id,                uint32_t,     , uint32,  1, "")                                        \
+	X(time_offset,                  float,        , single,  1, "pulse length correction time [s]")        \
+	X(single_focus,                 uint8_t,      , uint8,   1, "")                                        \
+	X(single_orientation,           uint8_t,      , uint8,   1, "")                                        \
+	X(decode_mode,                  uint8_t,      , uint8,   1, "")                                        \
+	X(sampling_mode,                uint8_t,      , uint8,   1, "")
 
 #define BEAMFORMER_UI_PARAMS \
 	X(output_min_coordinate,  float,     [3], single, 3, "[m] Back-Top-Left corner of output region")                     \
diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c
@@ -60,6 +60,8 @@ typedef enum {
 	BeamformerShaderDASFlags_Interpolate        = (1 << 2),
 	BeamformerShaderDASFlags_CoherencyWeighting = (1 << 3),
 	BeamformerShaderDASFlags_ReceiveOnly        = (1 << 4),
+	BeamformerShaderDASFlags_SingleFocus        = (1 << 5),
+	BeamformerShaderDASFlags_SingleOrientation  = (1 << 6),
 } BeamformerShaderDASFlags;
 
 typedef enum {
@@ -118,17 +120,20 @@ typedef union {
 typedef union {
 	struct {
 		u32 acquisition_count;
+		u32 acquisition_kind;
 		u32 channel_count;
 		u32 data_kind;
 		u32 sample_count;
-		u32 acquisition_kind;
+		u32 transmit_receive_orientation;
 		f32 demodulation_frequency;
 		f32 f_number;
+		f32 focus_depth;
 		f32 sampling_frequency;
 		f32 speed_of_sound;
 		f32 time_offset;
+		f32 transmit_angle;
 	};
-	u32 E[10];
+	u32 E[13];
 } BeamformerShaderDASBakeParameters;
 
 read_only global s8 beamformer_shader_names[] = {
@@ -235,6 +240,8 @@ read_only global s8 *beamformer_shader_flag_strings[] = {
 		s8_comp("Interpolate"),
 		s8_comp("CoherencyWeighting"),
 		s8_comp("ReceiveOnly"),
+		s8_comp("SingleFocus"),
+		s8_comp("SingleOrientation"),
 	},
 	0,
 	0,
@@ -244,7 +251,7 @@ read_only global s8 *beamformer_shader_flag_strings[] = {
 read_only global u8 beamformer_shader_flag_strings_count[] = {
 	1,
 	3,
-	5,
+	7,
 	0,
 	0,
 	0,
@@ -296,15 +303,18 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = {
 	},
 	(s8 []){
 		s8_comp("AcquisitionCount"),
+		s8_comp("AcquisitionKind"),
 		s8_comp("ChannelCount"),
 		s8_comp("DataKind"),
 		s8_comp("SampleCount"),
-		s8_comp("AcquisitionKind"),
+		s8_comp("TransmitReceiveOrientation"),
 		s8_comp("DemodulationFrequency"),
 		s8_comp("FNumber"),
+		s8_comp("FocusDepth"),
 		s8_comp("SamplingFrequency"),
 		s8_comp("SpeedOfSound"),
 		s8_comp("TimeOffset"),
+		s8_comp("TransmitAngle"),
 	},
 	0,
 	0,
@@ -314,7 +324,7 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = {
 read_only global u8 *beamformer_shader_bake_parameter_is_float[] = {
 	(u8 []){0, 0, 0, 0, 0, 0, 0, 0, 0},
 	(u8 []){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1},
-	(u8 []){0, 0, 0, 0, 0, 1, 1, 1, 1, 1},
+	(u8 []){0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1},
 	0,
 	0,
 	0,
@@ -323,7 +333,7 @@ read_only global u8 *beamformer_shader_bake_parameter_is_float[] = {
 read_only global i32 beamformer_shader_bake_parameter_counts[] = {
 	9,
 	12,
-	10,
+	13,
 	0,
 	0,
 	0,
diff --git a/shaders/das.glsl b/shaders/das.glsl
@@ -140,10 +140,22 @@ float cylindrical_wave_transmit_distance(const vec3 point, const float focal_dep
 	return distance(rca_plane_projection(point, tx_rows), f);
 }
 
+int tx_rx_orientation_for_acquisition(const int acquisition)
+{
+	int result = bool(SingleOrientation) ? TransmitReceiveOrientation : imageLoad(transmit_receive_orientations, acquisition).x;
+	return result;
+}
+
+vec2 focal_vector_for_acquisition(const int acquisition)
+{
+	vec2 result = bool(SingleFocus) ? vec2(TransmitAngle, FocusDepth) : imageLoad(focal_vectors, acquisition).xy;
+	return result;
+}
+
 float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, const int transmit_receive_orientation)
 {
 	float result = 0;
-	if (!bool(ReceiveOnly)) {
+	#if !ReceiveOnly
 		bool  tx_rows        = (transmit_receive_orientation & TX_ORIENTATION_MASK) == 0;
 		float transmit_angle = radians(focal_vector.x);
 		float focal_depth    = focal_vector.y;
@@ -153,7 +165,7 @@ float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, con
 		} else {
 			result = cylindrical_wave_transmit_distance(world_point, focal_depth, transmit_angle, tx_rows);
 		}
-	}
+	#endif
 	return result;
 }
 
@@ -163,11 +175,11 @@ RESULT_TYPE RCA(const vec3 world_point)
 	const int acquisition_end   = bool(Fast)? u_channel + 1 : AcquisitionCount;
 	RESULT_TYPE result = RESULT_TYPE(0);
 	for (int acquisition = acquisition_start; acquisition < acquisition_end; acquisition++) {
-		int   transmit_receive_orientation = imageLoad(transmit_receive_orientations, acquisition).x;
-		bool  rx_rows           = (transmit_receive_orientation & RX_ORIENTATION_MASK) == 0;
+		const int  tx_rx_orientation = tx_rx_orientation_for_acquisition(acquisition);
+		const bool rx_rows           = (tx_rx_orientation & RX_ORIENTATION_MASK) == 0;
+		const vec2 focal_vector      = focal_vector_for_acquisition(acquisition);
 		vec2  xdc_world_point   = rca_plane_projection((xdc_transform * vec4(world_point, 1)).xyz, rx_rows);
-		float transmit_distance = rca_transmit_distance(world_point, imageLoad(focal_vectors, acquisition).xy,
-		                                                transmit_receive_orientation);
+		float transmit_distance = rca_transmit_distance(world_point, focal_vector, tx_rx_orientation);
 
 		for (int rx_channel = 0; rx_channel < ChannelCount; rx_channel++) {
 			vec3  rx_center      = vec3(rx_channel * xdc_element_pitch, 0);
@@ -189,11 +201,11 @@ RESULT_TYPE HERCULES(const vec3 world_point)
 	const int rx_channel_start = bool(Fast)? u_channel     : 0;
 	const int rx_channel_end   = bool(Fast)? u_channel + 1 : ChannelCount;
 
-	int   transmit_receive_orientation = imageLoad(transmit_receive_orientations, 0).x;
-	vec3  xdc_world_point   = (xdc_transform * vec4(world_point, 1)).xyz;
-	bool  rx_cols           = (transmit_receive_orientation & RX_ORIENTATION_MASK) != 0;
-	float transmit_distance = rca_transmit_distance(world_point, imageLoad(focal_vectors, 0).xy,
-	                                                transmit_receive_orientation);
+	const int   tx_rx_orientation = tx_rx_orientation_for_acquisition(0);
+	const bool  rx_cols           = (tx_rx_orientation & RX_ORIENTATION_MASK) != 0;
+	const vec2  focal_vector      = focal_vector_for_acquisition(0);
+	const float transmit_distance = rca_transmit_distance(world_point, focal_vector, tx_rx_orientation);
+	const vec3  xdc_world_point   = (xdc_transform * vec4(world_point, 1)).xyz;
 
 	RESULT_TYPE result = RESULT_TYPE(0);
 	for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) {
diff --git a/tests/throughput.c b/tests/throughput.c
@@ -47,7 +47,7 @@ typedef struct {
 	f32 center_frequency;
 	f32 sampling_frequency;
 	f32 time_offset;
-	i32 transmit_mode;
+	u32 transmit_mode;
 } zemp_bp_v1;
 
 global b32 g_should_exit;
@@ -205,7 +205,7 @@ beamformer_parameters_from_zemp_bp_v1(zemp_bp_v1 *zbp, BeamformerParameters *out
 	out->sample_count           = zbp->decoded_data_dim[0];
 	out->channel_count          = zbp->decoded_data_dim[1];
 	out->acquisition_count      = zbp->decoded_data_dim[2];
-	out->decode                 = (u8)zbp->decode_mode;
+	out->decode_mode            = (u8)zbp->decode_mode;
 	out->das_shader_id          = zbp->beamform_mode;
 	out->time_offset            = zbp->time_offset;
 	out->sampling_frequency     = zbp->sampling_frequency;
@@ -335,22 +335,41 @@ execute_study(s8 study, Arena arena, Stream path, Options *options)
 	bp.decimation_rate = 1;
 	bp.demodulation_frequency = bp.sampling_frequency / 4;
 
+	/* NOTE(rnp): v1 files didn't specify sampling mode. it was almost always 4X */
+	bp.sampling_mode = BeamformerSamplingMode_4X;
+
+	#if 0
 	BeamformerFilterParameters kaiser = {0};
 	kaiser.Kaiser.beta             = 5.65f;
 	kaiser.Kaiser.cutoff_frequency = 2.0e6f;
 	kaiser.Kaiser.length           = 36;
 
-	f32 kaiser_parameters[sizeof(kaiser.Kaiser) / sizeof(f32)];
-	mem_copy(kaiser_parameters, &kaiser.Kaiser, sizeof(kaiser.Kaiser));
-	beamformer_create_filter(BeamformerFilterKind_Kaiser, kaiser_parameters,
-	                         countof(kaiser_parameters), bp.sampling_frequency / 2, 0, 0, 0);
+	beamformer_create_filter(BeamformerFilterKind_Kaiser, (f32 *)&kaiser.Kaiser,
+	                         sizeof(kaiser.Kaiser) / sizeof(f32), bp.sampling_frequency / 2, 0, 0, 0);
+	beamformer_set_pipeline_stage_parameters(0, 0);
+	#endif
+
+	#if 1
+	BeamformerFilterParameters matched = {0};
+	typeof(matched.MatchedChirp) *mp = &matched.MatchedChirp;
+	mp->duration      = 18e-6f;
+	mp->min_frequency = 2.9e6f - bp.demodulation_frequency;
+	mp->max_frequency = 6.0e6f - bp.demodulation_frequency;
+
+	bp.time_offset += mp->duration / 2;
+
+	beamformer_create_filter(BeamformerFilterKind_MatchedChirp, (f32 *)&matched.MatchedChirp,
+	                         sizeof(matched.MatchedChirp) / sizeof(f32), bp.sampling_frequency / 2,
+	                         1, 0, 0);
 	beamformer_set_pipeline_stage_parameters(0, 0);
+	#endif
 
 	if (zbp->sparse_elements[0] == -1) {
 		for (i16 i = 0; i < countof(zbp->sparse_elements); i++)
 			zbp->sparse_elements[i] = i;
 	}
 
+	#if 1
 	{
 		alignas(64) v2 focal_vectors[BeamformerMaxChannelCount];
 		for (u32 i = 0; i < countof(focal_vectors); i++)
@@ -364,6 +383,13 @@ execute_study(s8 study, Arena arena, Stream path, Options *options)
 		beamformer_push_transmit_receive_orientations(transmit_receive_orientations,
 		                                              countof(transmit_receive_orientations));
 	}
+	#else
+	bp.single_focus       = 1;
+	bp.single_orientation = 1;
+	bp.transmit_receive_orientation = zbp->transmit_mode;
+	bp.focal_vector[0] = zbp->transmit_angles[0];
+	bp.focal_vector[1] = zbp->focal_depths[0];
+	#endif
 
 	beamformer_push_channel_mapping(zbp->channel_mapping, countof(zbp->channel_mapping));
 	beamformer_push_sparse_elements(zbp->sparse_elements, countof(zbp->sparse_elements));

M	beamformer.c	\|	8	+++++++-
M	beamformer.meta	\|	16	++++++++++------
M	beamformer_parameters.h	\|	24	++++++++++++++----------
M	generated/beamformer.meta.c	\|	22	++++++++++++++++------
M	shaders/das.glsl	\|	34	+++++++++++++++++++++++-----------
M	tests/throughput.c	\|	38	++++++++++++++++++++++++++++++++------