ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

das.glsl (13703B)


      1 /* See LICENSE for license details. */
      2 #if   DataKind == DataKind_Float32
      3   #define SAMPLE_TYPE           float
      4   #define TEXTURE_KIND          r32f
      5   #define RESULT_TYPE_CAST(a)   (a).x
      6   #define OUTPUT_TYPE_CAST(a)   vec4((a).x, 0, 0, 0)
      7   #if (ShaderFlags & ShaderFlags_Fast)
      8     #define RESULT_TYPE         SAMPLE_TYPE
      9   #else
     10     #define RESULT_TYPE         vec2
     11     #define RESULT_LAST_INDEX   1
     12   #endif
     13 #elif DataKind == DataKind_Float32Complex
     14   #define SAMPLE_TYPE           vec2
     15   #define TEXTURE_KIND          rg32f
     16   #define RESULT_TYPE_CAST(a)   (a).xy
     17   #define OUTPUT_TYPE_CAST(a)   vec4((a).xy, 0, 0)
     18   #if (ShaderFlags & ShaderFlags_Fast)
     19     #define RESULT_TYPE         SAMPLE_TYPE
     20   #else
     21     #define RESULT_TYPE         vec3
     22     #define RESULT_LAST_INDEX   2
     23   #endif
     24 #else
     25   #error DataKind unsupported for DAS
     26 #endif
     27 
     28 layout(std430, binding = 1) readonly restrict buffer buffer_1 {
     29 	SAMPLE_TYPE rf_data[];
     30 };
     31 
     32 const bool sparse      = bool(ShaderFlags & ShaderFlags_Sparse);
     33 const bool interpolate = bool(ShaderFlags & ShaderFlags_Interpolate);
     34 
     35 #if (ShaderFlags & ShaderFlags_Fast)
     36 layout(TEXTURE_KIND, binding = 0)           restrict uniform image3D  u_out_data_tex;
     37 #else
     38 layout(TEXTURE_KIND, binding = 0) writeonly restrict uniform image3D  u_out_data_tex;
     39 #endif
     40 
     41 layout(r16i,  binding = 1) readonly  restrict uniform iimage1D sparse_elements;
     42 layout(rg32f, binding = 2) readonly  restrict uniform image1D  focal_vectors;
     43 
     44 #define C_SPLINE 0.5
     45 
     46 #if DataKind == DataKind_Float32Complex
     47 vec2 rotate_iq(vec2 iq, float time)
     48 {
     49 	float arg    = radians(360) * demodulation_frequency * time;
     50 	mat2  phasor = mat2( cos(arg), sin(arg),
     51 	                    -sin(arg), cos(arg));
     52 	vec2 result = phasor * iq;
     53 	return result;
     54 }
     55 #else
     56   #define rotate_iq(a, b) (a)
     57 #endif
     58 
     59 /* NOTE: See: https://cubic.org/docs/hermite.htm */
     60 SAMPLE_TYPE cubic(int base_index, float index)
     61 {
     62 	const mat4 h = mat4(
     63 		 2, -3,  0, 1,
     64 		-2,  3,  0, 0,
     65 		 1, -2,  1, 0,
     66 		 1, -1,  0, 0
     67 	);
     68 
     69 	float tk, t = modf(index, tk);
     70 	SAMPLE_TYPE samples[4] = {
     71 		rf_data[base_index + int(tk) - 1],
     72 		rf_data[base_index + int(tk) + 0],
     73 		rf_data[base_index + int(tk) + 1],
     74 		rf_data[base_index + int(tk) + 2],
     75 	};
     76 
     77 	vec4        S  = vec4(t * t * t, t * t, t, 1);
     78 	SAMPLE_TYPE P1 = samples[1];
     79 	SAMPLE_TYPE P2 = samples[2];
     80 	SAMPLE_TYPE T1 = C_SPLINE * (P2 - samples[0]);
     81 	SAMPLE_TYPE T2 = C_SPLINE * (samples[3] - P1);
     82 
     83 #if   DataKind == DataKind_Float32
     84 	vec4 C = vec4(P1.x, P2.x, T1.x, T2.x);
     85 	float result = dot(S, h * C);
     86 #elif DataKind == DataKind_Float32Complex
     87 	mat2x4 C = mat2x4(vec4(P1.x, P2.x, T1.x, T2.x), vec4(P1.y, P2.y, T1.y, T2.y));
     88 	vec2 result = S * h * C;
     89 #endif
     90 	return result;
     91 }
     92 
     93 SAMPLE_TYPE sample_rf(int channel, int transmit, float index)
     94 {
     95 	SAMPLE_TYPE result = SAMPLE_TYPE(index >= 0.0f) * SAMPLE_TYPE((int(index) + 1 + int(interpolate)) < sample_count);
     96 	int base_index = int(channel * sample_count * acquisition_count + transmit * sample_count);
     97 	if (interpolate) result *= cubic(base_index, index);
     98 	else             result *= rf_data[base_index + int(round(index))];
     99 	result = rotate_iq(result, index / sampling_frequency);
    100 	return result;
    101 }
    102 
    103 float sample_index(float distance)
    104 {
    105 	float  time = distance / speed_of_sound + time_offset;
    106 	return time * sampling_frequency;
    107 }
    108 
    109 float apodize(float arg)
    110 {
    111 	/* NOTE: used for constant F# dynamic receive apodization. This is implemented as:
    112 	 *
    113 	 *                  /        |x_e - x_i|\
    114 	 *    a(x, z) = cos(F# * π * ----------- ) ^ 2
    115 	 *                  \        |z_e - z_i|/
    116 	 *
    117 	 * where x,z_e are transducer element positions and x,z_i are image positions. */
    118 	float a = cos(clamp(abs(arg), 0, 0.25 * radians(360)));
    119 	return a * a;
    120 }
    121 
    122 vec2 rca_plane_projection(vec3 point, bool rows)
    123 {
    124 	vec2 result = vec2(point[int(rows)], point[2]);
    125 	return result;
    126 }
    127 
    128 float plane_wave_transmit_distance(vec3 point, float transmit_angle, bool tx_rows)
    129 {
    130 	return dot(rca_plane_projection(point, tx_rows), vec2(sin(transmit_angle), cos(transmit_angle)));
    131 }
    132 
    133 float cylindrical_wave_transmit_distance(vec3 point, float focal_depth, float transmit_angle, bool tx_rows)
    134 {
    135 	vec2 f = focal_depth * vec2(sin(transmit_angle), cos(transmit_angle));
    136 	return distance(rca_plane_projection(point, tx_rows), f);
    137 }
    138 
    139 #if (ShaderFlags & ShaderFlags_Fast)
    140 RESULT_TYPE RCA(vec3 world_point)
    141 {
    142 	bool  tx_rows         = bool((shader_flags & ShaderFlags_TxColumns) == 0);
    143 	bool  rx_rows         = bool((shader_flags & ShaderFlags_RxColumns) == 0);
    144 	vec2  xdc_world_point = rca_plane_projection((xdc_transform * vec4(world_point, 1)).xyz, rx_rows);
    145 	vec2  focal_vector    = imageLoad(focal_vectors, u_channel).xy;
    146 	float transmit_angle  = radians(focal_vector.x);
    147 	float focal_depth     = focal_vector.y;
    148 
    149 	float transmit_distance;
    150 	if (isinf(focal_depth)) {
    151 		transmit_distance = plane_wave_transmit_distance(world_point, transmit_angle, tx_rows);
    152 	} else {
    153 		transmit_distance = cylindrical_wave_transmit_distance(world_point, focal_depth,
    154 		                                                       transmit_angle, tx_rows);
    155 	}
    156 
    157 	RESULT_TYPE result = RESULT_TYPE(0);
    158 	for (int channel = 0; channel < channel_count; channel++) {
    159 		vec2  receive_vector   = xdc_world_point - rca_plane_projection(vec3(channel * xdc_element_pitch, 0), rx_rows);
    160 		float receive_distance = length(receive_vector);
    161 		float apodization      = apodize(f_number * radians(180) / abs(xdc_world_point.y) * receive_vector.x);
    162 
    163 		if (apodization > 0) {
    164 			float sidx  = sample_index(transmit_distance + receive_distance);
    165 			result     += apodization * sample_rf(channel, u_channel, sidx);
    166 		}
    167 	}
    168 	return result;
    169 }
    170 #else
    171 RESULT_TYPE RCA(vec3 world_point)
    172 {
    173 	bool tx_rows         = bool((shader_flags & ShaderFlags_TxColumns) == 0);
    174 	bool rx_rows         = bool((shader_flags & ShaderFlags_RxColumns) == 0);
    175 	vec2 xdc_world_point = rca_plane_projection((xdc_transform * vec4(world_point, 1)).xyz, rx_rows);
    176 
    177 	RESULT_TYPE result = RESULT_TYPE(0);
    178 	for (int transmit = 0; transmit < acquisition_count; transmit++) {
    179 		vec2  focal_vector   = imageLoad(focal_vectors, transmit).xy;
    180 		float transmit_angle = radians(focal_vector.x);
    181 		float focal_depth    = focal_vector.y;
    182 
    183 		float transmit_distance;
    184 		if (isinf(focal_depth)) {
    185 			transmit_distance = plane_wave_transmit_distance(world_point, transmit_angle, tx_rows);
    186 		} else {
    187 			transmit_distance = cylindrical_wave_transmit_distance(world_point, focal_depth,
    188 			                                                       transmit_angle, tx_rows);
    189 		}
    190 
    191 		for (int rx_channel = 0; rx_channel < channel_count; rx_channel++) {
    192 			vec3  rx_center      = vec3(rx_channel * xdc_element_pitch, 0);
    193 			vec2  receive_vector = xdc_world_point - rca_plane_projection(rx_center, rx_rows);
    194 			float apodization    = apodize(f_number * radians(180) / abs(xdc_world_point.y) * receive_vector.x);
    195 
    196 			if (apodization > 0) {
    197 				float       sidx  = sample_index(transmit_distance + length(receive_vector));
    198 				SAMPLE_TYPE value = apodization * sample_rf(rx_channel, transmit, sidx);
    199 				result += RESULT_TYPE(value, length(value));
    200 			}
    201 		}
    202 	}
    203 	return result;
    204 }
    205 #endif
    206 
    207 #if (ShaderFlags & ShaderFlags_Fast)
    208 RESULT_TYPE HERCULES(vec3 world_point)
    209 {
    210 	vec3  xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz;
    211 	bool  tx_rows         = bool((shader_flags & ShaderFlags_TxColumns) == 0);
    212 	bool  rx_cols         = bool((shader_flags & ShaderFlags_RxColumns));
    213 	vec2  focal_vector    = imageLoad(focal_vectors, 0).xy;
    214 	float transmit_angle  = radians(focal_vector.x);
    215 	float focal_depth     = focal_vector.y;
    216 
    217 	float transmit_distance;
    218 	if (isinf(focal_depth)) {
    219 		transmit_distance = plane_wave_transmit_distance(world_point, transmit_angle, tx_rows);
    220 	} else {
    221 		transmit_distance = cylindrical_wave_transmit_distance(world_point, focal_depth,
    222 		                                                       transmit_angle, tx_rows);
    223 	}
    224 
    225 	RESULT_TYPE result = RESULT_TYPE(0);
    226 	for (int transmit = int(sparse); transmit < acquisition_count; transmit++) {
    227 		int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit;
    228 		vec3 element_position;
    229 		if (rx_cols) element_position = vec3(u_channel,  tx_channel, 0) * vec3(xdc_element_pitch, 0);
    230 		else         element_position = vec3(tx_channel, u_channel,  0) * vec3(xdc_element_pitch, 0);
    231 
    232 		float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.z) *
    233 		                            distance(xdc_world_point.xy, element_position.xy));
    234 		if (apodization > 0) {
    235 			/* NOTE: tribal knowledge */
    236 			if (transmit == 0) apodization *= inversesqrt(acquisition_count);
    237 
    238 			float sidx  = sample_index(transmit_distance + distance(xdc_world_point, element_position));
    239 			result     += apodization * sample_rf(u_channel, transmit, sidx);
    240 		}
    241 	}
    242 	return result;
    243 }
    244 #else
    245 RESULT_TYPE HERCULES(vec3 world_point)
    246 {
    247 	vec3  xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz;
    248 	bool  tx_rows         = bool((shader_flags & ShaderFlags_TxColumns) == 0);
    249 	bool  rx_cols         = bool((shader_flags & ShaderFlags_RxColumns));
    250 	vec2  focal_vector    = imageLoad(focal_vectors, 0).xy;
    251 	float transmit_angle  = radians(focal_vector.x);
    252 	float focal_depth     = focal_vector.y;
    253 
    254 	float transmit_distance;
    255 	if (isinf(focal_depth)) {
    256 		transmit_distance = plane_wave_transmit_distance(world_point, transmit_angle, tx_rows);
    257 	} else {
    258 		transmit_distance = cylindrical_wave_transmit_distance(world_point, focal_depth,
    259 		                                                       transmit_angle, tx_rows);
    260 	}
    261 
    262 	RESULT_TYPE result = RESULT_TYPE(0);
    263 	for (int transmit = int(sparse); transmit < acquisition_count; transmit++) {
    264 		int tx_channel = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit;
    265 		for (int rx_channel = 0; rx_channel < channel_count; rx_channel++) {
    266 			vec3 element_position;
    267 			if (rx_cols) element_position = vec3(rx_channel, tx_channel, 0) * vec3(xdc_element_pitch, 0);
    268 			else         element_position = vec3(tx_channel, rx_channel, 0) * vec3(xdc_element_pitch, 0);
    269 
    270 			float apodization = apodize(f_number * radians(180) / abs(xdc_world_point.z) *
    271 			                            distance(xdc_world_point.xy, element_position.xy));
    272 			if (apodization > 0) {
    273 				/* NOTE: tribal knowledge */
    274 				if (transmit == 0) apodization *= inversesqrt(acquisition_count);
    275 
    276 				float       sidx  = sample_index(transmit_distance + distance(xdc_world_point, element_position));
    277 				SAMPLE_TYPE value = apodization * sample_rf(rx_channel, transmit, sidx);
    278 				result += RESULT_TYPE(value, length(value));
    279 			}
    280 		}
    281 	}
    282 	return result;
    283 }
    284 #endif
    285 
    286 #if (ShaderFlags & ShaderFlags_Fast)
    287 RESULT_TYPE FORCES(vec3 world_point)
    288 {
    289 	vec3  xdc_world_point  = (xdc_transform * vec4(world_point, 1)).xyz;
    290 	float receive_distance = distance(xdc_world_point.xz, vec2(u_channel * xdc_element_pitch.x, 0));
    291 	float apodization      = apodize(f_number * radians(180) / abs(xdc_world_point.z) *
    292 	                                 (xdc_world_point.x - u_channel * xdc_element_pitch.x));
    293 
    294 	RESULT_TYPE result = RESULT_TYPE(0);
    295 	if (apodization > 0) {
    296 		for (int transmit = int(sparse); transmit < acquisition_count; transmit++) {
    297 			int   tx_channel      = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit;
    298 			vec3  transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(channel_count / 2)), 0);
    299 
    300 			float sidx  = sample_index(distance(xdc_world_point, transmit_center) + receive_distance);
    301 			result     += apodization * sample_rf(u_channel, transmit, sidx);
    302 		}
    303 	}
    304 	return result;
    305 }
    306 #else
    307 RESULT_TYPE FORCES(vec3 world_point)
    308 {
    309 	vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz;
    310 
    311 	RESULT_TYPE result = RESULT_TYPE(0);
    312 	for (int rx_channel = 0; rx_channel < channel_count; rx_channel++) {
    313 		float receive_distance = distance(xdc_world_point.xz, vec2(rx_channel * xdc_element_pitch.x, 0));
    314 		float apodization      = apodize(f_number * radians(180) / abs(xdc_world_point.z) *
    315 		                                 (xdc_world_point.x - rx_channel * xdc_element_pitch.x));
    316 		if (apodization > 0) {
    317 			for (int transmit = int(sparse); transmit < acquisition_count; transmit++) {
    318 				int   tx_channel      = sparse ? imageLoad(sparse_elements, transmit - int(sparse)).x : transmit;
    319 				vec3  transmit_center = vec3(xdc_element_pitch * vec2(tx_channel, floor(channel_count / 2)), 0);
    320 
    321 				float       sidx  = sample_index(distance(xdc_world_point, transmit_center) + receive_distance);
    322 				SAMPLE_TYPE value = apodization * sample_rf(rx_channel, transmit, sidx);
    323 				result += RESULT_TYPE(value, length(value));
    324 			}
    325 		}
    326 	}
    327 	return result;
    328 }
    329 #endif
    330 
    331 void main()
    332 {
    333 	ivec3 out_voxel = ivec3(gl_GlobalInvocationID);
    334 	if (!all(lessThan(out_voxel, imageSize(u_out_data_tex))))
    335 		return;
    336 
    337 #if (ShaderFlags & ShaderFlags_Fast)
    338 	RESULT_TYPE sum = RESULT_TYPE_CAST(imageLoad(u_out_data_tex, out_voxel));
    339 #else
    340 	RESULT_TYPE sum = RESULT_TYPE(0);
    341 	out_voxel += u_voxel_offset;
    342 #endif
    343 
    344 	vec3 world_point = (voxel_transform * vec4(out_voxel, 1)).xyz;
    345 
    346 	switch (shader_kind) {
    347 	case ShaderKind_FORCES:
    348 	case ShaderKind_UFORCES:
    349 	{
    350 		sum += FORCES(world_point);
    351 	}break;
    352 	case ShaderKind_HERCULES:
    353 	case ShaderKind_UHERCULES:
    354 	{
    355 		sum += HERCULES(world_point);
    356 	}break;
    357 	case ShaderKind_Flash:
    358 	case ShaderKind_RCA_TPW:
    359 	case ShaderKind_RCA_VLS:
    360 	{
    361 		sum += RCA(world_point);
    362 	}break;
    363 	}
    364 
    365 	#if (ShaderFlags & ShaderFlags_Fast) == 0
    366 	/* TODO(rnp): scale such that brightness remains ~constant */
    367 	if (bool(shader_flags & ShaderFlags_CoherencyWeighting)) {
    368 		float denominator = sum[RESULT_LAST_INDEX] + float(sum[RESULT_LAST_INDEX] == 0);
    369 		RESULT_TYPE_CAST(sum) *= RESULT_TYPE_CAST(sum) / denominator;
    370 	}
    371 	#endif
    372 
    373 	imageStore(u_out_data_tex, out_voxel, OUTPUT_TYPE_CAST(sum));
    374 }