ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

das.glsl (11302B)


      1 /* See LICENSE for license details. */
      2 #if   DataKind == DataKind_Float32
      3   #define SAMPLE_TYPE           float
      4   #define TEXTURE_KIND          r32f
      5   #define RESULT_TYPE_CAST(a)   (a).x
      6   #define OUTPUT_TYPE_CAST(a)   vec4((a).x, 0, 0, 0)
      7   #if !Fast
      8     #define RESULT_TYPE         vec2
      9     #define RESULT_LAST_INDEX   1
     10   #endif
     11 #elif DataKind == DataKind_Float32Complex
     12   #define SAMPLE_TYPE           vec2
     13   #define TEXTURE_KIND          rg32f
     14   #define RESULT_TYPE_CAST(a)   (a).xy
     15   #define OUTPUT_TYPE_CAST(a)   vec4((a).xy, 0, 0)
     16   #if !Fast
     17     #define RESULT_TYPE         vec3
     18     #define RESULT_LAST_INDEX   2
     19   #endif
     20 #else
     21   #error DataKind unsupported for DAS
     22 #endif
     23 
     24 layout(std430, binding = 1) readonly restrict buffer buffer_1 {
     25 	SAMPLE_TYPE rf_data[];
     26 };
     27 
     28 #ifndef RESULT_TYPE
     29   #define RESULT_TYPE SAMPLE_TYPE
     30 #endif
     31 
     32 #if Fast
     33   #define RESULT_STORE(a, length_a) RESULT_TYPE(a)
     34 	layout(TEXTURE_KIND, binding = 0)           restrict uniform image3D  u_out_data_tex;
     35 #else
     36   #define RESULT_STORE(a, length_a) RESULT_TYPE(a, length_a)
     37 	layout(TEXTURE_KIND, binding = 0) writeonly restrict uniform image3D  u_out_data_tex;
     38 #endif
     39 
     40 layout(r16i,  binding = 1) readonly  restrict uniform iimage1D sparse_elements;
     41 layout(rg32f, binding = 2) readonly  restrict uniform image1D  focal_vectors;
     42 layout(r8i,   binding = 3) readonly  restrict uniform iimage1D transmit_receive_orientations;
     43 
     44 #define RX_ORIENTATION(tx_rx) (((tx_rx) >> 0) & 0x0F)
     45 #define TX_ORIENTATION(tx_rx) (((tx_rx) >> 4) & 0x0F)
     46 
     47 #define C_SPLINE 0.5
     48 
     49 #if DataKind == DataKind_Float32Complex
     50 vec2 rotate_iq(const vec2 iq, const float time)
     51 {
     52 	float arg    = radians(360) * DemodulationFrequency * time;
     53 	mat2  phasor = mat2( cos(arg), sin(arg),
     54 	                    -sin(arg), cos(arg));
     55 	vec2 result = phasor * iq;
     56 	return result;
     57 }
     58 #else
     59   #define rotate_iq(a, b) (a)
     60 #endif
     61 
     62 /* NOTE: See: https://cubic.org/docs/hermite.htm */
     63 SAMPLE_TYPE cubic(const int base_index, const float index)
     64 {
     65 	const mat4 h = mat4(
     66 		 2, -3,  0, 1,
     67 		-2,  3,  0, 0,
     68 		 1, -2,  1, 0,
     69 		 1, -1,  0, 0
     70 	);
     71 
     72 	float tk, t = modf(index, tk);
     73 	SAMPLE_TYPE samples[4] = {
     74 		rf_data[base_index + int(tk) - 1],
     75 		rf_data[base_index + int(tk) + 0],
     76 		rf_data[base_index + int(tk) + 1],
     77 		rf_data[base_index + int(tk) + 2],
     78 	};
     79 
     80 	vec4        S  = vec4(t * t * t, t * t, t, 1);
     81 	SAMPLE_TYPE P1 = samples[1];
     82 	SAMPLE_TYPE P2 = samples[2];
     83 	SAMPLE_TYPE T1 = C_SPLINE * (P2 - samples[0]);
     84 	SAMPLE_TYPE T2 = C_SPLINE * (samples[3] - P1);
     85 
     86 #if   DataKind == DataKind_Float32
     87 	vec4 C = vec4(P1.x, P2.x, T1.x, T2.x);
     88 	float result = dot(S, h * C);
     89 #elif DataKind == DataKind_Float32Complex
     90 	mat2x4 C = mat2x4(vec4(P1.x, P2.x, T1.x, T2.x), vec4(P1.y, P2.y, T1.y, T2.y));
     91 	vec2 result = S * h * C;
     92 #endif
     93 	return result;
     94 }
     95 
     96 SAMPLE_TYPE sample_rf(const int channel, const int transmit, const float index)
     97 {
     98 	SAMPLE_TYPE result = SAMPLE_TYPE(0);
     99 	int base_index = int(channel * SampleCount * AcquisitionCount + transmit * SampleCount);
    100 	switch (InterpolationMode) {
    101 	case InterpolationMode_Nearest:{
    102 		if (index >= 0 && int(round(index)) < SampleCount)
    103 			result = rotate_iq(rf_data[base_index + int(round(index))], index / SamplingFrequency);
    104 	}break;
    105 	case InterpolationMode_Linear:{
    106 		if (index >= 0 && round(index) < SampleCount) {
    107 			float tk, t = modf(index, tk);
    108 			int n = base_index + int(tk);
    109 			result = (1 - t) * rf_data[n] + t * rf_data[n + 1];
    110 		}
    111 		result = rotate_iq(result, index / SamplingFrequency);
    112 	}break;
    113 	case InterpolationMode_Cubic:{
    114 		if (index >= 0 && (int(index) + 2) < SampleCount)
    115 			result = rotate_iq(cubic(base_index, index), index / SamplingFrequency);
    116 	}break;
    117 	}
    118 	return result;
    119 }
    120 
    121 float sample_index(const float distance)
    122 {
    123 	float  time = distance / SpeedOfSound + TimeOffset;
    124 	return time * SamplingFrequency;
    125 }
    126 
    127 float apodize(const float arg)
    128 {
    129 	/* IMPORTANT: do not move calculation of arg into this function. It will generate a
    130 	 * conditional move resulting in cos always being evaluated causing a slowdown */
    131 
    132 	/* NOTE: constant F# dynamic receive apodization. This is implemented as:
    133 	 *
    134 	 *                  /        |x_e - x_i|\
    135 	 *    a(x, z) = cos(F# * π * ----------- ) ^ 2
    136 	 *                  \        |z_e - z_i|/
    137 	 *
    138 	 * where x,z_e are transducer element positions and x,z_i are image positions. */
    139 	float a = cos(radians(180) * arg);
    140 	return a * a;
    141 }
    142 
    143 vec2 rca_plane_projection(const vec3 point, const bool rows)
    144 {
    145 	vec2 result = vec2(point[int(rows)], point[2]);
    146 	return result;
    147 }
    148 
    149 float plane_wave_transmit_distance(const vec3 point, const float transmit_angle, const bool tx_rows)
    150 {
    151 	return dot(rca_plane_projection(point, tx_rows), vec2(sin(transmit_angle), cos(transmit_angle)));
    152 }
    153 
    154 float cylindrical_wave_transmit_distance(const vec3 point, const float focal_depth,
    155                                          const float transmit_angle, const bool tx_rows)
    156 {
    157 	vec2 f = focal_depth * vec2(sin(transmit_angle), cos(transmit_angle));
    158 	return distance(rca_plane_projection(point, tx_rows), f);
    159 }
    160 
    161 int tx_rx_orientation_for_acquisition(const int acquisition)
    162 {
    163 	int result = bool(SingleOrientation) ? TransmitReceiveOrientation : imageLoad(transmit_receive_orientations, acquisition).x;
    164 	return result;
    165 }
    166 
    167 vec2 focal_vector_for_acquisition(const int acquisition)
    168 {
    169 	vec2 result = bool(SingleFocus) ? vec2(TransmitAngle, FocusDepth) : imageLoad(focal_vectors, acquisition).xy;
    170 	return result;
    171 }
    172 
    173 float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, const int transmit_receive_orientation)
    174 {
    175 	float result = 0;
    176 	if (TX_ORIENTATION(transmit_receive_orientation) != RCAOrientation_None) {
    177 		bool  tx_rows        = TX_ORIENTATION(transmit_receive_orientation) == RCAOrientation_Rows;
    178 		float transmit_angle = radians(focal_vector.x);
    179 		float focal_depth    = focal_vector.y;
    180 
    181 		if (isinf(focal_depth)) {
    182 			result = plane_wave_transmit_distance(world_point, transmit_angle, tx_rows);
    183 		} else {
    184 			result = cylindrical_wave_transmit_distance(world_point, focal_depth, transmit_angle, tx_rows);
    185 		}
    186 	}
    187 	return result;
    188 }
    189 
    190 RESULT_TYPE RCA(const vec3 world_point)
    191 {
    192 	const int acquisition_start = bool(Fast)? u_channel     : 0;
    193 	const int acquisition_end   = bool(Fast)? u_channel + 1 : AcquisitionCount;
    194 	RESULT_TYPE result = RESULT_TYPE(0);
    195 	for (int acquisition = acquisition_start; acquisition < acquisition_end; acquisition++) {
    196 		const int  tx_rx_orientation = tx_rx_orientation_for_acquisition(acquisition);
    197 		const bool rx_rows           = RX_ORIENTATION(tx_rx_orientation) == RCAOrientation_Rows;
    198 		const vec2 focal_vector      = focal_vector_for_acquisition(acquisition);
    199 		vec2  xdc_world_point   = rca_plane_projection((xdc_transform * vec4(world_point, 1)).xyz, rx_rows);
    200 		float transmit_distance = rca_transmit_distance(world_point, focal_vector, tx_rx_orientation);
    201 
    202 		for (int rx_channel = 0; rx_channel < ChannelCount; rx_channel++) {
    203 			vec3  rx_center      = vec3(rx_channel * xdc_element_pitch, 0);
    204 			vec2  receive_vector = xdc_world_point - rca_plane_projection(rx_center, rx_rows);
    205 			float a_arg          = abs(FNumber * receive_vector.x / abs(xdc_world_point.y));
    206 
    207 			if (a_arg < 0.5f) {
    208 				float       sidx  = sample_index(transmit_distance + length(receive_vector));
    209 				SAMPLE_TYPE value = apodize(a_arg) * sample_rf(rx_channel, acquisition, sidx);
    210 				result += RESULT_STORE(value, length(value));
    211 			}
    212 		}
    213 	}
    214 	return result;
    215 }
    216 
    217 RESULT_TYPE HERCULES(const vec3 world_point)
    218 {
    219 	const int rx_channel_start = bool(Fast)? u_channel     : 0;
    220 	const int rx_channel_end   = bool(Fast)? u_channel + 1 : ChannelCount;
    221 
    222 	const int   tx_rx_orientation = tx_rx_orientation_for_acquisition(0);
    223 	const bool  rx_cols           = RX_ORIENTATION(tx_rx_orientation) == RCAOrientation_Columns;
    224 	const vec2  focal_vector      = focal_vector_for_acquisition(0);
    225 	const float transmit_distance = rca_transmit_distance(world_point, focal_vector, tx_rx_orientation);
    226 	const vec3  xdc_world_point   = (xdc_transform * vec4(world_point, 1)).xyz;
    227 
    228 	RESULT_TYPE result = RESULT_TYPE(0);
    229 	for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) {
    230 		int tx_channel = bool(Sparse) ? imageLoad(sparse_elements, transmit - Sparse).x : transmit;
    231 		for (int rx_channel = rx_channel_start; rx_channel < rx_channel_end; rx_channel++) {
    232 			vec3 element_position;
    233 			if (rx_cols) element_position = vec3(rx_channel, tx_channel, 0) * vec3(xdc_element_pitch, 0);
    234 			else         element_position = vec3(tx_channel, rx_channel, 0) * vec3(xdc_element_pitch, 0);
    235 
    236 			float a_arg = abs(FNumber * distance(xdc_world_point.xy, element_position.xy) /
    237 			                  abs(xdc_world_point.z));
    238 			if (a_arg < 0.5f) {
    239 				float apodization = apodize(a_arg);
    240 				/* NOTE: tribal knowledge */
    241 				if (transmit == 0) apodization *= inversesqrt(AcquisitionCount);
    242 
    243 				float       sidx  = sample_index(transmit_distance + distance(xdc_world_point, element_position));
    244 				SAMPLE_TYPE value = apodization * sample_rf(rx_channel, transmit, sidx);
    245 				result += RESULT_STORE(value, length(value));
    246 			}
    247 		}
    248 	}
    249 	return result;
    250 }
    251 
    252 RESULT_TYPE FORCES(const vec3 world_point)
    253 {
    254 	const int rx_channel_start = bool(Fast)? u_channel     : 0;
    255 	const int rx_channel_end   = bool(Fast)? u_channel + 1 : ChannelCount;
    256 
    257 	RESULT_TYPE result = RESULT_TYPE(0);
    258 	vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz;
    259 	for (int rx_channel = rx_channel_start; rx_channel < rx_channel_end; rx_channel++) {
    260 		float receive_distance = distance(xdc_world_point.xz, vec2(rx_channel * xdc_element_pitch.x, 0));
    261 		float a_arg = abs(FNumber * (xdc_world_point.x - rx_channel * xdc_element_pitch.x) /
    262 		                  abs(xdc_world_point.z));
    263 		if (a_arg < 0.5f) {
    264 			float apodization = apodize(a_arg);
    265 			for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) {
    266 				int   tx_channel      = bool(Sparse) ? imageLoad(sparse_elements, transmit - Sparse).x : transmit;
    267 				vec3  transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(ChannelCount / 2)), 0);
    268 
    269 				float       sidx  = sample_index(distance(xdc_world_point, transmit_center) + receive_distance);
    270 				SAMPLE_TYPE value = apodization * sample_rf(rx_channel, transmit, sidx);
    271 				result += RESULT_STORE(value, length(value));
    272 			}
    273 		}
    274 	}
    275 	return result;
    276 }
    277 
    278 void main()
    279 {
    280 	ivec3 out_voxel = ivec3(gl_GlobalInvocationID);
    281 	if (!all(lessThan(out_voxel, imageSize(u_out_data_tex))))
    282 		return;
    283 
    284 #if Fast
    285 	RESULT_TYPE sum = RESULT_TYPE_CAST(imageLoad(u_out_data_tex, out_voxel));
    286 #else
    287 	RESULT_TYPE sum = RESULT_TYPE(0);
    288 	out_voxel += u_voxel_offset;
    289 #endif
    290 
    291 	vec3 world_point = (voxel_transform * vec4(out_voxel, 1)).xyz;
    292 
    293 	switch (AcquisitionKind) {
    294 	case AcquisitionKind_FORCES:
    295 	case AcquisitionKind_UFORCES:
    296 	{
    297 		sum += FORCES(world_point);
    298 	}break;
    299 	case AcquisitionKind_HERCULES:
    300 	case AcquisitionKind_UHERCULES:
    301 	case AcquisitionKind_HERO_PA:
    302 	{
    303 		sum += HERCULES(world_point);
    304 	}break;
    305 	case AcquisitionKind_Flash:
    306 	case AcquisitionKind_RCA_TPW:
    307 	case AcquisitionKind_RCA_VLS:
    308 	{
    309 		sum += RCA(world_point);
    310 	}break;
    311 	}
    312 
    313 	#if CoherencyWeighting
    314 		/* TODO(rnp): scale such that brightness remains ~constant */
    315 		float denominator = sum[RESULT_LAST_INDEX] + float(sum[RESULT_LAST_INDEX] == 0);
    316 		RESULT_TYPE_CAST(sum) *= RESULT_TYPE_CAST(sum) / denominator;
    317 	#endif
    318 
    319 	imageStore(u_out_data_tex, out_voxel, OUTPUT_TYPE_CAST(sum));
    320 }