ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | LICENSE

beamformer.h (6489B)


      1 /* See LICENSE for license details. */
      2 #ifndef _BEAMFORMER_H_
      3 #define _BEAMFORMER_H_
      4 
      5 #include <immintrin.h>
      6 
      7 #include <glad.h>
      8 
      9 #define GRAPHICS_API_OPENGL_43
     10 #include <raylib.h>
     11 #include <rlgl.h>
     12 
     13 #include "util.h"
     14 
     15 #define BG_COLOUR              (v4){.r = 0.15, .g = 0.12, .b = 0.13, .a = 1.0}
     16 #define FG_COLOUR              (v4){.r = 0.92, .g = 0.88, .b = 0.78, .a = 1.0}
     17 #define FOCUSED_COLOUR         (v4){.r = 0.86, .g = 0.28, .b = 0.21, .a = 1.0}
     18 #define HOVERED_COLOUR         (v4){.r = 0.11, .g = 0.50, .b = 0.59, .a = 1.0}
     19 
     20 #define INFO_COLUMN_WIDTH      560
     21 /* NOTE: extra space used for allowing mouse clicks after end of text */
     22 #define TEXT_BOX_EXTRA_X       10.0f
     23 
     24 #define TEXT_HOVER_SPEED       5.0f
     25 
     26 #define RECT_BTN_COLOUR        (Color){.r = 0x43, .g = 0x36, .b = 0x3a, .a = 0xff}
     27 #define RECT_BTN_BORDER_COLOUR (Color){.r = 0x00, .g = 0x00, .b = 0x00, .a = 0xCC}
     28 #define RECT_BTN_ROUNDNESS     0.3f
     29 #define RECT_BTN_BORDER_WIDTH  6.0f
     30 
     31 typedef union {
     32 	struct { f32 x, y; };
     33 	struct { f32 w, h; };
     34 	f32 E[2];
     35 	Vector2 rl;
     36 } v2;
     37 
     38 typedef union {
     39 	struct { f32 x, y, z; };
     40 	struct { f32 w, h, d; };
     41 	f32 E[3];
     42 	Vector3 rl;
     43 } v3;
     44 
     45 typedef union {
     46 	struct { f32 x, y, z, w; };
     47 	struct { f32 r, g, b, a; };
     48 	struct { v3 xyz; f32 _1; };
     49 	struct { f32 _2; v3 yzw; };
     50 	struct { v2 xy, zw; };
     51 	f32 E[4];
     52 	Vector4 rl;
     53 } v4;
     54 
     55 typedef union {
     56 	struct { v2 pos, size; };
     57 	Rectangle rl;
     58 } Rect;
     59 
     60 enum program_flags {
     61 	RELOAD_SHADERS = 1 << 0,
     62 	GEN_MIPMAPS    = 1 << 29,
     63 	DO_COMPUTE     = 1 << 30,
     64 };
     65 
     66 enum gl_vendor_ids {
     67 	GL_VENDOR_AMD,
     68 	GL_VENDOR_INTEL,
     69 	GL_VENDOR_NVIDIA,
     70 };
     71 
     72 enum modifiable_value_flags {
     73 	MV_CAUSES_COMPUTE = 1 << 0,
     74 	MV_FLOAT          = 1 << 1,
     75 	MV_INT            = 1 << 2,
     76 	MV_GEN_MIPMAPS    = 1 << 29,
     77 	MV_POWER_OF_TWO   = 1 << 30,
     78 };
     79 typedef struct {
     80 	void *value;
     81 	u32   flags;
     82 	f32   scale;
     83 	union {v2 flimits; iv2 ilimits;};
     84 } BPModifiableValue;
     85 
     86 typedef struct {
     87 	char buf[64];
     88 	BPModifiableValue store;
     89 	i32  buf_len;
     90 	i32  cursor;
     91 	f32  cursor_hover_p;
     92 	f32  cursor_blink_t;
     93 	f32  cursor_blink_target;
     94 } InputState;
     95 
     96 #include "beamformer_parameters.h"
     97 typedef struct {
     98 	BeamformerParameters raw;
     99 	enum compute_shaders compute_stages[16];
    100 	u32                  compute_stages_count;
    101 	b32                  upload;
    102 } BeamformerParametersFull;
    103 
    104 #if defined(__unix__)
    105 	#include "os_unix.c"
    106 
    107 	#ifdef _DEBUG
    108 	#define DEBUG_EXPORT
    109 	#define OS_DEBUG_LIB_NAME      "./beamformer.so"
    110 	#define OS_DEBUG_LIB_TEMP_NAME "./beamformer_temp.so"
    111 	#else
    112 	#define DEBUG_EXPORT static
    113 	#endif
    114 
    115 	#define OS_CUDA_LIB_NAME      "./external/cuda_toolkit.so"
    116 	#define OS_CUDA_LIB_TEMP_NAME "./external/cuda_toolkit_temp.so"
    117 
    118 	#define OS_PIPE_NAME "/tmp/beamformer_data_fifo"
    119 	#define OS_SMEM_NAME "/ogl_beamformer_parameters"
    120 #elif defined(_WIN32)
    121 	#include "os_win32.c"
    122 
    123 	#ifdef _DEBUG
    124 	#define DEBUG_EXPORT __declspec(dllexport)
    125 	#define OS_DEBUG_LIB_NAME      "beamformer.dll"
    126 	#define OS_DEBUG_LIB_TEMP_NAME "beamformer_temp.dll"
    127 	#else
    128 	#define DEBUG_EXPORT static
    129 	#endif
    130 
    131 	#define OS_CUDA_LIB_NAME      "external\\cuda_toolkit.dll"
    132 	#define OS_CUDA_LIB_TEMP_NAME "external\\cuda_toolkit_temp.dll"
    133 
    134 	#define OS_PIPE_NAME "\\\\.\\pipe\\beamformer_data_fifo"
    135 	#define OS_SMEM_NAME "Local\\ogl_beamformer_parameters"
    136 #else
    137 	#error Unsupported Platform!
    138 #endif
    139 
    140 #define MAX_FRAMES_IN_FLIGHT 3
    141 
    142 #define INIT_CUDA_CONFIGURATION_FN(name) void name(u32 *input_dims, u32 *decoded_dims, u16 *channel_mapping, b32 rx_cols)
    143 typedef INIT_CUDA_CONFIGURATION_FN(init_cuda_configuration_fn);
    144 #define REGISTER_CUDA_BUFFERS_FN(name) void name(u32 *rf_data_ssbos, u32 rf_buffer_count, u32 raw_data_ssbo)
    145 typedef REGISTER_CUDA_BUFFERS_FN(register_cuda_buffers_fn);
    146 #define CUDA_DECODE_FN(name) void name(size_t input_offset, u32 output_buffer_idx)
    147 typedef CUDA_DECODE_FN(cuda_decode_fn);
    148 #define CUDA_HILBERT_FN(name) void name(u32 input_buffer_idx, u32 output_buffer_idx)
    149 typedef CUDA_HILBERT_FN(cuda_hilbert_fn);
    150 
    151 typedef struct {
    152 	os_library_handle           lib;
    153 	os_filetime                 timestamp;
    154 	init_cuda_configuration_fn *init_cuda_configuration;
    155 	register_cuda_buffers_fn   *register_cuda_buffers;
    156 	cuda_decode_fn             *cuda_decode;
    157 	cuda_hilbert_fn            *cuda_hilbert;
    158 } CudaLib;
    159 
    160 typedef struct {
    161 	u32 programs[CS_LAST];
    162 
    163 	u32    timer_index;
    164 	u32    timer_ids[MAX_FRAMES_IN_FLIGHT][CS_LAST];
    165 	GLsync timer_fences[MAX_FRAMES_IN_FLIGHT];
    166 	f32    last_frame_time[CS_LAST];
    167 
    168 	/* NOTE: the raw_data_ssbo is allocated at 3x the required size to allow for tiled
    169 	 * transfers when the GPU is running behind the CPU. It is not mapped on NVIDIA because
    170 	 * their drivers _will_ store the buffer in the system memory. This doesn't happen
    171 	 * for Intel or AMD and mapping the buffer is preferred. In either case incoming data can
    172 	 * be written to the arena at the appropriate offset for the current raw_data_index. An
    173 	 * additional BufferSubData is needed on NVIDIA to upload the data. */
    174 	GLsync raw_data_fences[MAX_FRAMES_IN_FLIGHT];
    175 	Arena  raw_data_arena;
    176 	u32    raw_data_ssbo;
    177 	u32    raw_data_index;
    178 
    179 	/* NOTE: Decoded data is only relevant in the context of a single frame. We use two
    180 	 * buffers so that they can be swapped when chaining multiple compute stages */
    181 	u32 rf_data_ssbos[2];
    182 	u32 last_output_ssbo_index;
    183 	u32 hadamard_ssbo;
    184 	uv2 hadamard_dim;
    185 
    186 	u32 shared_ubo;
    187 
    188 	uv4 dec_data_dim;
    189 	uv2 rf_raw_dim;
    190 	i32 out_data_tex_id;
    191 	i32 mip_view_tex_id;
    192 	i32 mips_level_id;
    193 	i32 volume_export_pass_id;
    194 	i32 volume_export_dim_offset_id;
    195 } ComputeShaderCtx;
    196 
    197 typedef struct {
    198 	Shader          shader;
    199 	RenderTexture2D output;
    200 	i32             out_data_tex_id;
    201 	i32             db_cutoff_id;
    202 	f32             db;
    203 } FragmentShaderCtx;
    204 
    205 enum export_state {
    206 	ES_START        = (1 <<  0),
    207 	ES_COMPUTING    = (1 <<  1),
    208 	ES_TIMER_ACTIVE = (1 <<  2),
    209 };
    210 
    211 typedef struct {
    212 	Arena volume_buf;
    213 	uv4   volume_dim;
    214 	u32   timer_ids[2];
    215 	f32   runtime;
    216 	u32   volume_texture;
    217 	i32   volume_texture_id;
    218 	u32   rf_data_ssbo;
    219 	u32   output_ssbo;
    220 	u32   state;
    221 	u32   dispatch_index;
    222 } ExportCtx;
    223 
    224 typedef struct {
    225 	uv2 window_size;
    226 	u32 flags;
    227 	enum gl_vendor_ids gl_vendor_id;
    228 
    229 	f32 dt;
    230 
    231 	/* UI Theming */
    232 	Font font;
    233 	Font small_font;
    234 
    235 	InputState is;
    236 
    237 	uv4 out_data_dim;
    238 	u32 out_texture;
    239 	u32 out_texture_unit;
    240 	u32 out_texture_mips;
    241 
    242 	ComputeShaderCtx  csctx;
    243 	FragmentShaderCtx fsctx;
    244 	ExportCtx         export_ctx;
    245 
    246 	os_pipe data_pipe;
    247 	u32     partial_transfer_count;
    248 
    249 	CudaLib cuda_lib;
    250 
    251 	BeamformerParametersFull *params;
    252 } BeamformerCtx;
    253 
    254 #endif /*_BEAMFORMER_H_ */