ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

beamformer_shared_memory.c (9711B)


      1 /* See LICENSE for license details. */
      2 #define BEAMFORMER_SHARED_MEMORY_VERSION (26UL)
      3 
      4 typedef struct BeamformerFrame BeamformerFrame;
      5 
      6 typedef enum {
      7 	BeamformerWorkKind_Compute,
      8 	BeamformerWorkKind_ComputeIndirect,
      9 	BeamformerWorkKind_CreateFilter,
     10 	BeamformerWorkKind_ExportBuffer,
     11 } BeamformerWorkKind;
     12 
     13 typedef struct {
     14 	BeamformerFilterParameters parameters;
     15 	u8 filter_slot;
     16 	u8 parameter_block;
     17 	static_assert(BeamformerFilterSlots            <= 255, "CreateFilterContext only supports 255 filter slots");
     18 	static_assert(BeamformerMaxParameterBlockSlots <= 255, "CreateFilterContext only supports 255 parameter blocks");
     19 } BeamformerCreateFilterContext;
     20 
     21 typedef enum {
     22 	BeamformerExportKind_BeamformedData,
     23 	BeamformerExportKind_Stats,
     24 } BeamformerExportKind;
     25 
     26 typedef struct {
     27 	BeamformerExportKind kind;
     28 	u32 size;
     29 } BeamformerExportContext;
     30 
     31 #define BEAMFORMER_SHARED_MEMORY_LOCKS \
     32 	X(ScratchSpace)    \
     33 	X(UploadRF)        \
     34 	X(ExportSync)      \
     35 	X(DispatchCompute)
     36 
     37 #define X(name) BeamformerSharedMemoryLockKind_##name,
     38 typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Count} BeamformerSharedMemoryLockKind;
     39 #undef X
     40 
     41 typedef struct {
     42 	BeamformerFrame *frame;
     43 	u32              parameter_block;
     44 } BeamformerComputeWorkContext;
     45 
     46 typedef struct {
     47 	BeamformerViewPlaneTag view_plane;
     48 	u32                    parameter_block;
     49 } BeamformerComputeIndirectWorkContext;
     50 
     51 /* NOTE: discriminated union based on type */
     52 typedef struct {
     53 	BeamformerWorkKind kind;
     54 	BeamformerSharedMemoryLockKind lock;
     55 	union {
     56 		void                                 *generic;
     57 		BeamformerComputeWorkContext          compute_context;
     58 		BeamformerComputeIndirectWorkContext  compute_indirect_context;
     59 		BeamformerCreateFilterContext         create_filter_context;
     60 		BeamformerExportContext               export_context;
     61 		BeamformerShaderKind                  reload_shader;
     62 	};
     63 } BeamformWork;
     64 
     65 typedef struct {
     66 	union {
     67 		u64 queue;
     68 		struct {u32 widx, ridx;};
     69 	};
     70 	BeamformWork work_items[1 << 6];
     71 } BeamformWorkQueue;
     72 
     73 #define X(name, id) BeamformerLiveImagingDirtyFlags_##name = (1 << id),
     74 typedef enum {BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST} BeamformerLiveImagingDirtyFlags;
     75 #undef X
     76 
     77 #define BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \
     78 	X(ComputePipeline,             pipeline)        \
     79 	X(ChannelMapping,              channel_mapping) \
     80 	X(FocalVectors,                focal_vectors)   \
     81 	X(Parameters,                  parameters)      \
     82 	X(SparseElements,              sparse_elements) \
     83 	X(TransmitReceiveOrientations, transmit_receive_orientations) \
     84 
     85 #define BEAMFORMER_PARAMETER_BLOCK_REGION_FLAG_LIST \
     86 	BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \
     87 	X(NotifyUI) \
     88 
     89 typedef enum {
     90 	#define X(k, ...) BeamformerParameterBlockRegion_##k,
     91 	BEAMFORMER_PARAMETER_BLOCK_REGION_LIST
     92 	#undef X
     93 	BeamformerParameterBlockRegion_Count
     94 } BeamformerParameterBlockRegions;
     95 
     96 typedef enum {
     97 	#define X(k, ...) BeamformerParameterRegionFlag_##k,
     98 	BEAMFORMER_PARAMETER_BLOCK_REGION_FLAG_LIST
     99 	#undef X
    100 	BeamformerParameterRegionFlag_Count,
    101 } BeamformerParameterRegionFlags;
    102 
    103 typedef union {
    104 	u8 filter_slot;
    105 } BeamformerShaderParameters;
    106 
    107 typedef struct {
    108 	BeamformerShaderKind       shaders[BeamformerMaxComputeShaderStages];
    109 	BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages];
    110 	u32                        shader_count;
    111 	BeamformerDataKind         data_kind;
    112 } BeamformerComputePipeline;
    113 
    114 typedef struct {
    115 	alignas(16) union {
    116 		BeamformerParameters parameters;
    117 		struct {
    118 			BeamformerParametersHead  parameters_head;
    119 			BeamformerUIParameters    parameters_ui;
    120 			BeamformerParametersExtra parameters_extra;
    121 		};
    122 	};
    123 
    124 	/* NOTE(rnp): signals to the beamformer that a subregion of a block has been updated */
    125 	u32 region_update_flags;
    126 	static_assert(BeamformerParameterRegionFlag_Count <= 32, "");
    127 
    128 	BeamformerComputePipeline pipeline;
    129 
    130 	alignas(16) i16 channel_mapping[BeamformerMaxChannelCount];
    131 	alignas(16) i16 sparse_elements[BeamformerMaxChannelCount];
    132 	alignas(16) u8  transmit_receive_orientations[BeamformerMaxChannelCount];
    133 	/* NOTE(rnp): interleaved transmit angle, focal depth pairs */
    134 	alignas(16) v2  focal_vectors[BeamformerMaxChannelCount];
    135 } BeamformerParameterBlock;
    136 static_assert(sizeof(BeamformerParameterBlock) % alignof(BeamformerParameterBlock) == 0,
    137               "sizeof(BeamformerParametersBlock) must be a multiple of its alignment");
    138 
    139 #define X(k, field) [BeamformerParameterBlockRegion_##k] = offsetof(BeamformerParameterBlock, field),
    140 read_only global u16 BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_Count] = {
    141 	BEAMFORMER_PARAMETER_BLOCK_REGION_LIST
    142 };
    143 #undef X
    144 
    145 typedef struct {
    146 	u32 version;
    147 
    148 	/* NOTE(rnp): causes future library calls to fail.
    149 	 * see note in beamformer_invalidate_shared_memory() */
    150 	b32 invalid;
    151 
    152 	/* NOTE(rnp): not used for locking on w32 but we can use these to peek at the status of
    153 	 * the lock without leaving userspace. */
    154 	i32 locks[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots];
    155 
    156 	/* NOTE(rnp): total number of parameter block regions the client has requested.
    157 	 * used to calculate offset to scratch space and to track number of allocated
    158 	 * semaphores on w32. Defaults to 1 but can be changed at runtime */
    159 	u32 reserved_parameter_blocks;
    160 
    161 	/* TODO(rnp): this is really sucky. we need a better way to communicate this */
    162 	u64 rf_block_rf_size;
    163 
    164 	BeamformerLiveImagingParameters live_imaging_parameters;
    165 	BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags;
    166 
    167 	BeamformWorkQueue external_work_queue;
    168 } BeamformerSharedMemory;
    169 
    170 function BeamformWork *
    171 beamform_work_queue_pop(BeamformWorkQueue *q)
    172 {
    173 	BeamformWork *result = 0;
    174 
    175 	static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2");
    176 	u64 val  = atomic_load_u64(&q->queue);
    177 	u64 mask = countof(q->work_items) - 1;
    178 	u64 widx = val       & mask;
    179 	u64 ridx = val >> 32 & mask;
    180 
    181 	if (ridx != widx)
    182 		result = q->work_items + ridx;
    183 
    184 	return result;
    185 }
    186 
    187 function void
    188 beamform_work_queue_pop_commit(BeamformWorkQueue *q)
    189 {
    190 	atomic_add_u64(&q->queue, 0x100000000ULL);
    191 }
    192 
    193 function BeamformWork *
    194 beamform_work_queue_push(BeamformWorkQueue *q)
    195 {
    196 	BeamformWork *result = 0;
    197 
    198 	static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2");
    199 	u64 val  = atomic_load_u64(&q->queue);
    200 	u64 mask = countof(q->work_items) - 1;
    201 	u64 widx = val        & mask;
    202 	u64 ridx = val >> 32  & mask;
    203 	u64 next = (widx + 1) & mask;
    204 
    205 	if (val & 0x80000000)
    206 		atomic_and_u64(&q->queue, ~0x80000000);
    207 
    208 	if (next != ridx) {
    209 		result = q->work_items + widx;
    210 		zero_struct(result);
    211 	}
    212 
    213 	return result;
    214 }
    215 
    216 function void
    217 beamform_work_queue_push_commit(BeamformWorkQueue *q)
    218 {
    219 	atomic_add_u64(&q->queue, 1);
    220 }
    221 
    222 #if OS_WINDOWS
    223 // NOTE(rnp): junk needed on w32 to watch a value across processes while yielding
    224 // control back to the kernel. There are user level CPU instructions that allow
    225 // this so why w32 can't do it in kernel mode sounds like shitty design to me.
    226 DEBUG_IMPORT OSW32Semaphore os_w32_shared_memory_semaphores[countof(((BeamformerSharedMemory *)0)->locks)];
    227 #endif
    228 
    229 function b32
    230 beamformer_shared_memory_take_lock(BeamformerSharedMemory *sm, i32 lock, u32 timeout_ms)
    231 {
    232 #if OS_WINDOWS
    233 	b32 result = os_w32_semaphore_wait(os_w32_shared_memory_semaphores[lock], timeout_ms);
    234 	if (result) atomic_store_u32(sm->locks + lock, 1);
    235 #else
    236 	b32 result = take_lock(sm->locks + lock, timeout_ms);
    237 #endif
    238 	return result;
    239 }
    240 
    241 function void
    242 beamformer_shared_memory_release_lock(BeamformerSharedMemory *sm, i32 lock)
    243 {
    244 	release_lock(sm->locks + lock);
    245 #if OS_WINDOWS
    246 	os_w32_semaphore_release(os_w32_shared_memory_semaphores[lock], 1);
    247 #endif
    248 }
    249 
    250 function BeamformerParameterBlock *
    251 beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block)
    252 {
    253 	assert(sm->reserved_parameter_blocks >= block);
    254 	BeamformerParameterBlock *result = (typeof(result))((u8 *)(sm + 1) + block * sizeof(*result));
    255 	return result;
    256 }
    257 
    258 function b32
    259 beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block)
    260 {
    261 	b32 result = beamformer_parameter_block(sm, block)->region_update_flags != 0;
    262 	return result;
    263 }
    264 
    265 function BeamformerParameterBlock *
    266 beamformer_parameter_block_lock(BeamformerSharedMemory *sm, u32 block, i32 timeout_ms)
    267 {
    268 	assert(block < BeamformerMaxParameterBlockSlots);
    269 	BeamformerParameterBlock *result = 0;
    270 	if (beamformer_shared_memory_take_lock(sm, BeamformerSharedMemoryLockKind_Count + block, (u32)timeout_ms))
    271 		result = beamformer_parameter_block(sm, block);
    272 	return result;
    273 }
    274 
    275 function void
    276 beamformer_parameter_block_unlock(BeamformerSharedMemory *sm, u32 block)
    277 {
    278 	assert(block < BeamformerMaxParameterBlockSlots);
    279 	beamformer_shared_memory_release_lock(sm, BeamformerSharedMemoryLockKind_Count + block);
    280 }
    281 
    282 function Arena
    283 beamformer_shared_memory_scratch_arena(BeamformerSharedMemory *sm, i64 shared_memory_size)
    284 {
    285 	assert(sm->reserved_parameter_blocks > 0);
    286 	BeamformerParameterBlock *last = beamformer_parameter_block(sm, sm->reserved_parameter_blocks);
    287 	Arena result = {.beg = (u8 *)(last + 1), .end = (u8 *)sm + shared_memory_size};
    288 	result.beg = arena_aligned_start(result, KB(4));
    289 	return result;
    290 }
    291 
    292 function void
    293 mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region)
    294 {
    295 	BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block);
    296 	atomic_or_u32(&pb->region_update_flags, 1u << region);
    297 }
    298 
    299 function void
    300 post_sync_barrier(BeamformerSharedMemory *sm, BeamformerSharedMemoryLockKind lock)
    301 {
    302 	/* NOTE(rnp): debug: here it is not a bug to release the lock if it
    303 	 * isn't held but elswhere it is */
    304 	DEBUG_DECL(if (sm->locks[lock])) {
    305 		beamformer_shared_memory_release_lock(sm, lock);
    306 	}
    307 }