Commit: f98319eadb856d31bd0b976d53d6a1a7fa6dfd32
Parent: dfd834c4c23e90108067ee642f2be1a3592669c4
Author: Randy Palamar
Date: Wed, 8 Oct 2025 09:09:26 -0600
lib & shaders/das: add special case for single focal vector and orientation
using this with HERCULES yields a ~4% performance boost which
could be nice for volume reconstruction
Diffstat:
6 files changed, 102 insertions(+), 40 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -453,11 +453,17 @@ das_ubo_from_beamformer_parameters(BeamformerComputePlan *cp, BeamformerDASUBO *
cp->das_bake.sample_count = bp->sample_count;
cp->das_bake.channel_count = bp->channel_count;
cp->das_bake.acquisition_count = bp->acquisition_count;
+ cp->das_bake.transmit_angle = bp->focal_vector[0];
+ cp->das_bake.focus_depth = bp->focal_vector[1];
+ cp->das_bake.transmit_receive_orientation = bp->transmit_receive_orientation;
u32 result = 0;
if (bp->coherency_weighting) result |= BeamformerShaderDASFlags_CoherencyWeighting;
else result |= BeamformerShaderDASFlags_Fast;
+ if (bp->single_focus) result |= BeamformerShaderDASFlags_SingleFocus;
+ if (bp->single_orientation) result |= BeamformerShaderDASFlags_SingleOrientation;
+
if (bp->das_shader_id == BeamformerAcquisitionKind_UFORCES || bp->das_shader_id == BeamformerAcquisitionKind_UHERCULES)
result |= BeamformerShaderDASFlags_Sparse;
@@ -589,7 +595,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
u32 input_channel_stride = pb->parameters.raw_data_dimensions[0];
BeamformerShaderDecodeBakeParameters *dp = &cp->decode_bake;
- dp->decode_mode = pb->parameters.decode;
+ dp->decode_mode = pb->parameters.decode_mode;
dp->transmit_count = cp->das_bake.acquisition_count;
dp->input_sample_stride = decode_first? input_sample_stride : cp->das_bake.acquisition_count;
diff --git a/beamformer.meta b/beamformer.meta
@@ -93,20 +93,24 @@
@Enumeration(AcquisitionKind)
@Enumeration(DataKind)
@Enumeration(RCAOrientation)
- @Flags([Fast Sparse Interpolate CoherencyWeighting ReceiveOnly])
+ @Flags([Fast Sparse Interpolate CoherencyWeighting ReceiveOnly SingleFocus SingleOrientation])
@Bake
{
- @BakeInt(AcquisitionCount acquisition_count)
- @BakeInt(ChannelCount channel_count )
- @BakeInt(DataKind data_kind )
- @BakeInt(SampleCount sample_count )
- @BakeInt(AcquisitionKind acquisition_kind )
+ @BakeInt(AcquisitionCount acquisition_count )
+ @BakeInt(AcquisitionKind acquisition_kind )
+ @BakeInt(ChannelCount channel_count )
+ @BakeInt(DataKind data_kind )
+ @BakeInt(SampleCount sample_count )
+ @BakeInt(TransmitReceiveOrientation transmit_receive_orientation)
+
@BakeFloat(DemodulationFrequency demodulation_frequency)
@BakeFloat(FNumber f_number )
+ @BakeFloat(FocusDepth focus_depth )
@BakeFloat(SamplingFrequency sampling_frequency )
@BakeFloat(SpeedOfSound speed_of_sound )
@BakeFloat(TimeOffset time_offset )
+ @BakeFloat(TransmitAngle transmit_angle )
}
}
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -55,16 +55,20 @@ typedef enum {BEAMFORMER_CONSTANTS_LIST} BeamformerConstants;
/* X(name, type, size, matlab_type, elements, comment) */
#define BEAMFORMER_PARAMS_HEAD \
- X(xdc_transform, float, [16], single, 16, "IMPORTANT: column major order") \
- X(xdc_element_pitch, float, [2], single, 2, "[m] Transducer Element Pitch {row, col}") \
- X(raw_data_dimensions, uint32_t, [2], uint32, 2, "Raw Data Dimensions") \
- X(sample_count, uint32_t, , uint32, 1, "") \
- X(channel_count, uint32_t, , uint32, 1, "") \
- X(acquisition_count, uint32_t, , uint32, 1, "") \
- X(das_shader_id, uint32_t, , uint32, 1, "") \
- X(time_offset, float, , single, 1, "pulse length correction time [s]") \
- X(decode, uint16_t, , uint16, 1, "Decode or just reshape data") \
- X(sampling_mode, uint16_t, , uint16, 1, "")
+ X(xdc_transform, float, [16], single, 16, "IMPORTANT: column major order") \
+ X(xdc_element_pitch, float, [2], single, 2, "[m] Transducer Element Pitch {row, col}") \
+ X(raw_data_dimensions, uint32_t, [2], uint32, 2, "Raw Data Dimensions") \
+ X(focal_vector, float, [2], single, 2, "[degree, m] focal point {angle, depth}") \
+ X(transmit_receive_orientation, uint32_t, , uint32, 1, "") \
+ X(sample_count, uint32_t, , uint32, 1, "") \
+ X(channel_count, uint32_t, , uint32, 1, "") \
+ X(acquisition_count, uint32_t, , uint32, 1, "") \
+ X(das_shader_id, uint32_t, , uint32, 1, "") \
+ X(time_offset, float, , single, 1, "pulse length correction time [s]") \
+ X(single_focus, uint8_t, , uint8, 1, "") \
+ X(single_orientation, uint8_t, , uint8, 1, "") \
+ X(decode_mode, uint8_t, , uint8, 1, "") \
+ X(sampling_mode, uint8_t, , uint8, 1, "")
#define BEAMFORMER_UI_PARAMS \
X(output_min_coordinate, float, [3], single, 3, "[m] Back-Top-Left corner of output region") \
diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c
@@ -60,6 +60,8 @@ typedef enum {
BeamformerShaderDASFlags_Interpolate = (1 << 2),
BeamformerShaderDASFlags_CoherencyWeighting = (1 << 3),
BeamformerShaderDASFlags_ReceiveOnly = (1 << 4),
+ BeamformerShaderDASFlags_SingleFocus = (1 << 5),
+ BeamformerShaderDASFlags_SingleOrientation = (1 << 6),
} BeamformerShaderDASFlags;
typedef enum {
@@ -118,17 +120,20 @@ typedef union {
typedef union {
struct {
u32 acquisition_count;
+ u32 acquisition_kind;
u32 channel_count;
u32 data_kind;
u32 sample_count;
- u32 acquisition_kind;
+ u32 transmit_receive_orientation;
f32 demodulation_frequency;
f32 f_number;
+ f32 focus_depth;
f32 sampling_frequency;
f32 speed_of_sound;
f32 time_offset;
+ f32 transmit_angle;
};
- u32 E[10];
+ u32 E[13];
} BeamformerShaderDASBakeParameters;
read_only global s8 beamformer_shader_names[] = {
@@ -235,6 +240,8 @@ read_only global s8 *beamformer_shader_flag_strings[] = {
s8_comp("Interpolate"),
s8_comp("CoherencyWeighting"),
s8_comp("ReceiveOnly"),
+ s8_comp("SingleFocus"),
+ s8_comp("SingleOrientation"),
},
0,
0,
@@ -244,7 +251,7 @@ read_only global s8 *beamformer_shader_flag_strings[] = {
read_only global u8 beamformer_shader_flag_strings_count[] = {
1,
3,
- 5,
+ 7,
0,
0,
0,
@@ -296,15 +303,18 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = {
},
(s8 []){
s8_comp("AcquisitionCount"),
+ s8_comp("AcquisitionKind"),
s8_comp("ChannelCount"),
s8_comp("DataKind"),
s8_comp("SampleCount"),
- s8_comp("AcquisitionKind"),
+ s8_comp("TransmitReceiveOrientation"),
s8_comp("DemodulationFrequency"),
s8_comp("FNumber"),
+ s8_comp("FocusDepth"),
s8_comp("SamplingFrequency"),
s8_comp("SpeedOfSound"),
s8_comp("TimeOffset"),
+ s8_comp("TransmitAngle"),
},
0,
0,
@@ -314,7 +324,7 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = {
read_only global u8 *beamformer_shader_bake_parameter_is_float[] = {
(u8 []){0, 0, 0, 0, 0, 0, 0, 0, 0},
(u8 []){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1},
- (u8 []){0, 0, 0, 0, 0, 1, 1, 1, 1, 1},
+ (u8 []){0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1},
0,
0,
0,
@@ -323,7 +333,7 @@ read_only global u8 *beamformer_shader_bake_parameter_is_float[] = {
read_only global i32 beamformer_shader_bake_parameter_counts[] = {
9,
12,
- 10,
+ 13,
0,
0,
0,
diff --git a/shaders/das.glsl b/shaders/das.glsl
@@ -140,10 +140,22 @@ float cylindrical_wave_transmit_distance(const vec3 point, const float focal_dep
return distance(rca_plane_projection(point, tx_rows), f);
}
+int tx_rx_orientation_for_acquisition(const int acquisition)
+{
+ int result = bool(SingleOrientation) ? TransmitReceiveOrientation : imageLoad(transmit_receive_orientations, acquisition).x;
+ return result;
+}
+
+vec2 focal_vector_for_acquisition(const int acquisition)
+{
+ vec2 result = bool(SingleFocus) ? vec2(TransmitAngle, FocusDepth) : imageLoad(focal_vectors, acquisition).xy;
+ return result;
+}
+
float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, const int transmit_receive_orientation)
{
float result = 0;
- if (!bool(ReceiveOnly)) {
+ #if !ReceiveOnly
bool tx_rows = (transmit_receive_orientation & TX_ORIENTATION_MASK) == 0;
float transmit_angle = radians(focal_vector.x);
float focal_depth = focal_vector.y;
@@ -153,7 +165,7 @@ float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, con
} else {
result = cylindrical_wave_transmit_distance(world_point, focal_depth, transmit_angle, tx_rows);
}
- }
+ #endif
return result;
}
@@ -163,11 +175,11 @@ RESULT_TYPE RCA(const vec3 world_point)
const int acquisition_end = bool(Fast)? u_channel + 1 : AcquisitionCount;
RESULT_TYPE result = RESULT_TYPE(0);
for (int acquisition = acquisition_start; acquisition < acquisition_end; acquisition++) {
- int transmit_receive_orientation = imageLoad(transmit_receive_orientations, acquisition).x;
- bool rx_rows = (transmit_receive_orientation & RX_ORIENTATION_MASK) == 0;
+ const int tx_rx_orientation = tx_rx_orientation_for_acquisition(acquisition);
+ const bool rx_rows = (tx_rx_orientation & RX_ORIENTATION_MASK) == 0;
+ const vec2 focal_vector = focal_vector_for_acquisition(acquisition);
vec2 xdc_world_point = rca_plane_projection((xdc_transform * vec4(world_point, 1)).xyz, rx_rows);
- float transmit_distance = rca_transmit_distance(world_point, imageLoad(focal_vectors, acquisition).xy,
- transmit_receive_orientation);
+ float transmit_distance = rca_transmit_distance(world_point, focal_vector, tx_rx_orientation);
for (int rx_channel = 0; rx_channel < ChannelCount; rx_channel++) {
vec3 rx_center = vec3(rx_channel * xdc_element_pitch, 0);
@@ -189,11 +201,11 @@ RESULT_TYPE HERCULES(const vec3 world_point)
const int rx_channel_start = bool(Fast)? u_channel : 0;
const int rx_channel_end = bool(Fast)? u_channel + 1 : ChannelCount;
- int transmit_receive_orientation = imageLoad(transmit_receive_orientations, 0).x;
- vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz;
- bool rx_cols = (transmit_receive_orientation & RX_ORIENTATION_MASK) != 0;
- float transmit_distance = rca_transmit_distance(world_point, imageLoad(focal_vectors, 0).xy,
- transmit_receive_orientation);
+ const int tx_rx_orientation = tx_rx_orientation_for_acquisition(0);
+ const bool rx_cols = (tx_rx_orientation & RX_ORIENTATION_MASK) != 0;
+ const vec2 focal_vector = focal_vector_for_acquisition(0);
+ const float transmit_distance = rca_transmit_distance(world_point, focal_vector, tx_rx_orientation);
+ const vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz;
RESULT_TYPE result = RESULT_TYPE(0);
for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) {
diff --git a/tests/throughput.c b/tests/throughput.c
@@ -47,7 +47,7 @@ typedef struct {
f32 center_frequency;
f32 sampling_frequency;
f32 time_offset;
- i32 transmit_mode;
+ u32 transmit_mode;
} zemp_bp_v1;
global b32 g_should_exit;
@@ -205,7 +205,7 @@ beamformer_parameters_from_zemp_bp_v1(zemp_bp_v1 *zbp, BeamformerParameters *out
out->sample_count = zbp->decoded_data_dim[0];
out->channel_count = zbp->decoded_data_dim[1];
out->acquisition_count = zbp->decoded_data_dim[2];
- out->decode = (u8)zbp->decode_mode;
+ out->decode_mode = (u8)zbp->decode_mode;
out->das_shader_id = zbp->beamform_mode;
out->time_offset = zbp->time_offset;
out->sampling_frequency = zbp->sampling_frequency;
@@ -335,22 +335,41 @@ execute_study(s8 study, Arena arena, Stream path, Options *options)
bp.decimation_rate = 1;
bp.demodulation_frequency = bp.sampling_frequency / 4;
+ /* NOTE(rnp): v1 files didn't specify sampling mode. it was almost always 4X */
+ bp.sampling_mode = BeamformerSamplingMode_4X;
+
+ #if 0
BeamformerFilterParameters kaiser = {0};
kaiser.Kaiser.beta = 5.65f;
kaiser.Kaiser.cutoff_frequency = 2.0e6f;
kaiser.Kaiser.length = 36;
- f32 kaiser_parameters[sizeof(kaiser.Kaiser) / sizeof(f32)];
- mem_copy(kaiser_parameters, &kaiser.Kaiser, sizeof(kaiser.Kaiser));
- beamformer_create_filter(BeamformerFilterKind_Kaiser, kaiser_parameters,
- countof(kaiser_parameters), bp.sampling_frequency / 2, 0, 0, 0);
+ beamformer_create_filter(BeamformerFilterKind_Kaiser, (f32 *)&kaiser.Kaiser,
+ sizeof(kaiser.Kaiser) / sizeof(f32), bp.sampling_frequency / 2, 0, 0, 0);
+ beamformer_set_pipeline_stage_parameters(0, 0);
+ #endif
+
+ #if 1
+ BeamformerFilterParameters matched = {0};
+ typeof(matched.MatchedChirp) *mp = &matched.MatchedChirp;
+ mp->duration = 18e-6f;
+ mp->min_frequency = 2.9e6f - bp.demodulation_frequency;
+ mp->max_frequency = 6.0e6f - bp.demodulation_frequency;
+
+ bp.time_offset += mp->duration / 2;
+
+ beamformer_create_filter(BeamformerFilterKind_MatchedChirp, (f32 *)&matched.MatchedChirp,
+ sizeof(matched.MatchedChirp) / sizeof(f32), bp.sampling_frequency / 2,
+ 1, 0, 0);
beamformer_set_pipeline_stage_parameters(0, 0);
+ #endif
if (zbp->sparse_elements[0] == -1) {
for (i16 i = 0; i < countof(zbp->sparse_elements); i++)
zbp->sparse_elements[i] = i;
}
+ #if 1
{
alignas(64) v2 focal_vectors[BeamformerMaxChannelCount];
for (u32 i = 0; i < countof(focal_vectors); i++)
@@ -364,6 +383,13 @@ execute_study(s8 study, Arena arena, Stream path, Options *options)
beamformer_push_transmit_receive_orientations(transmit_receive_orientations,
countof(transmit_receive_orientations));
}
+ #else
+ bp.single_focus = 1;
+ bp.single_orientation = 1;
+ bp.transmit_receive_orientation = zbp->transmit_mode;
+ bp.focal_vector[0] = zbp->transmit_angles[0];
+ bp.focal_vector[1] = zbp->focal_depths[0];
+ #endif
beamformer_push_channel_mapping(zbp->channel_mapping, countof(zbp->channel_mapping));
beamformer_push_sparse_elements(zbp->sparse_elements, countof(zbp->sparse_elements));