ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

beamformer_shared_memory.c (9711B)


      1 /* See LICENSE for license details. */
      2 #define BEAMFORMER_SHARED_MEMORY_VERSION (23UL)
      3 
      4 typedef struct BeamformerFrame BeamformerFrame;
      5 
      6 typedef enum {
      7 	BeamformerWorkKind_Compute,
      8 	BeamformerWorkKind_ComputeIndirect,
      9 	BeamformerWorkKind_CreateFilter,
     10 	BeamformerWorkKind_ExportBuffer,
     11 	BeamformerWorkKind_UploadBuffer,
     12 } BeamformerWorkKind;
     13 
     14 typedef struct {
     15 	BeamformerFilterParameters parameters;
     16 	u8 filter_slot;
     17 	u8 parameter_block;
     18 	static_assert(BeamformerFilterSlots            <= 255, "CreateFilterContext only supports 255 filter slots");
     19 	static_assert(BeamformerMaxParameterBlockSlots <= 255, "CreateFilterContext only supports 255 parameter blocks");
     20 } BeamformerCreateFilterContext;
     21 
     22 typedef enum {
     23 	BeamformerExportKind_BeamformedData,
     24 	BeamformerExportKind_Stats,
     25 } BeamformerExportKind;
     26 
     27 typedef struct {
     28 	BeamformerExportKind kind;
     29 	u32 size;
     30 } BeamformerExportContext;
     31 
     32 #define BEAMFORMER_SHARED_MEMORY_LOCKS \
     33 	X(ScratchSpace)    \
     34 	X(UploadRF)        \
     35 	X(ExportSync)      \
     36 	X(DispatchCompute)
     37 
     38 #define X(name) BeamformerSharedMemoryLockKind_##name,
     39 typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Count} BeamformerSharedMemoryLockKind;
     40 #undef X
     41 
     42 typedef struct {
     43 	BeamformerFrame *frame;
     44 	u32              parameter_block;
     45 } BeamformerComputeWorkContext;
     46 
     47 typedef struct {
     48 	BeamformerViewPlaneTag view_plane;
     49 	u32                    parameter_block;
     50 } BeamformerComputeIndirectWorkContext;
     51 
     52 /* NOTE: discriminated union based on type */
     53 typedef struct {
     54 	BeamformerWorkKind kind;
     55 	BeamformerSharedMemoryLockKind lock;
     56 	union {
     57 		void                                 *generic;
     58 		BeamformerComputeWorkContext          compute_context;
     59 		BeamformerComputeIndirectWorkContext  compute_indirect_context;
     60 		BeamformerCreateFilterContext         create_filter_context;
     61 		BeamformerExportContext               export_context;
     62 		BeamformerShaderKind                  reload_shader;
     63 	};
     64 } BeamformWork;
     65 
     66 typedef struct {
     67 	union {
     68 		u64 queue;
     69 		struct {u32 widx, ridx;};
     70 	};
     71 	BeamformWork work_items[1 << 6];
     72 } BeamformWorkQueue;
     73 
     74 #define BEAMFORMER_SHARED_MEMORY_SIZE             (GB(2))
     75 #define BEAMFORMER_SHARED_MEMORY_MAX_SCRATCH_SIZE (BEAMFORMER_SHARED_MEMORY_SIZE - \
     76                                                    sizeof(BeamformerSharedMemory) - \
     77                                                    sizeof(BeamformerParameterBlock))
     78 
     79 #define X(name, id) BeamformerLiveImagingDirtyFlags_##name = (1 << id),
     80 typedef enum {BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST} BeamformerLiveImagingDirtyFlags;
     81 #undef X
     82 
     83 #define BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \
     84 	X(ComputePipeline,             pipeline)        \
     85 	X(ChannelMapping,              channel_mapping) \
     86 	X(FocalVectors,                focal_vectors)   \
     87 	X(Parameters,                  parameters)      \
     88 	X(SparseElements,              sparse_elements) \
     89 	X(TransmitReceiveOrientations, transmit_receive_orientations)
     90 
     91 typedef enum {
     92 	#define X(k, ...) BeamformerParameterBlockRegion_##k,
     93 	BEAMFORMER_PARAMETER_BLOCK_REGION_LIST
     94 	#undef X
     95 	BeamformerParameterBlockRegion_Count
     96 } BeamformerParameterBlockRegions;
     97 
     98 typedef union {
     99 	u8 filter_slot;
    100 } BeamformerShaderParameters;
    101 
    102 typedef struct {
    103 	BeamformerShaderKind       shaders[BeamformerMaxComputeShaderStages];
    104 	BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages];
    105 	u32                        shader_count;
    106 	BeamformerDataKind         data_kind;
    107 } BeamformerComputePipeline;
    108 
    109 typedef struct {
    110 	alignas(16) union {
    111 		BeamformerParameters parameters;
    112 		struct {
    113 			BeamformerParametersHead parameters_head;
    114 			BeamformerUIParameters   parameters_ui;
    115 		};
    116 	};
    117 
    118 	/* NOTE(rnp): signals to the beamformer that a subregion of a block has been updated */
    119 	u32 dirty_regions;
    120 	static_assert(BeamformerParameterBlockRegion_Count <= 32, "only 32 parameter block regions supported");
    121 
    122 	BeamformerComputePipeline pipeline;
    123 
    124 	alignas(16) i16 channel_mapping[BeamformerMaxChannelCount];
    125 	alignas(16) i16 sparse_elements[BeamformerMaxChannelCount];
    126 	alignas(16) u8  transmit_receive_orientations[BeamformerMaxChannelCount];
    127 	/* NOTE(rnp): interleaved transmit angle, focal depth pairs */
    128 	alignas(16) v2  focal_vectors[BeamformerMaxChannelCount];
    129 } BeamformerParameterBlock;
    130 static_assert(sizeof(BeamformerParameterBlock) % alignof(BeamformerParameterBlock) == 0,
    131               "sizeof(BeamformerParametersBlock) must be a multiple of its alignment");
    132 
    133 #define X(k, field) [BeamformerParameterBlockRegion_##k] = offsetof(BeamformerParameterBlock, field),
    134 read_only global u16 BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_Count] = {
    135 	BEAMFORMER_PARAMETER_BLOCK_REGION_LIST
    136 };
    137 #undef X
    138 
    139 typedef struct {
    140 	u32 version;
    141 
    142 	/* NOTE(rnp): causes future library calls to fail.
    143 	 * see note in beamformer_invalidate_shared_memory() */
    144 	b32 invalid;
    145 
    146 	/* NOTE(rnp): not used for locking on w32 but we can use these to peek at the status of
    147 	 * the lock without leaving userspace. */
    148 	i32 locks[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots];
    149 
    150 	/* NOTE(rnp): total number of parameter block regions the client has requested.
    151 	 * used to calculate offset to scratch space and to track number of allocated
    152 	 * semaphores on w32. Defaults to 1 but can be changed at runtime */
    153 	u32 reserved_parameter_blocks;
    154 
    155 	/* TODO(rnp): this is really sucky. we need a better way to communicate this */
    156 	u64 rf_block_rf_size;
    157 
    158 	BeamformerLiveImagingParameters live_imaging_parameters;
    159 	BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags;
    160 
    161 	BeamformWorkQueue external_work_queue;
    162 } BeamformerSharedMemory;
    163 
    164 function BeamformWork *
    165 beamform_work_queue_pop(BeamformWorkQueue *q)
    166 {
    167 	BeamformWork *result = 0;
    168 
    169 	static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2");
    170 	u64 val  = atomic_load_u64(&q->queue);
    171 	u64 mask = countof(q->work_items) - 1;
    172 	u64 widx = val       & mask;
    173 	u64 ridx = val >> 32 & mask;
    174 
    175 	if (ridx != widx)
    176 		result = q->work_items + ridx;
    177 
    178 	return result;
    179 }
    180 
    181 function void
    182 beamform_work_queue_pop_commit(BeamformWorkQueue *q)
    183 {
    184 	atomic_add_u64(&q->queue, 0x100000000ULL);
    185 }
    186 
    187 function BeamformWork *
    188 beamform_work_queue_push(BeamformWorkQueue *q)
    189 {
    190 	BeamformWork *result = 0;
    191 
    192 	static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2");
    193 	u64 val  = atomic_load_u64(&q->queue);
    194 	u64 mask = countof(q->work_items) - 1;
    195 	u64 widx = val        & mask;
    196 	u64 ridx = val >> 32  & mask;
    197 	u64 next = (widx + 1) & mask;
    198 
    199 	if (val & 0x80000000)
    200 		atomic_and_u64(&q->queue, ~0x80000000);
    201 
    202 	if (next != ridx) {
    203 		result = q->work_items + widx;
    204 		zero_struct(result);
    205 	}
    206 
    207 	return result;
    208 }
    209 
    210 function void
    211 beamform_work_queue_push_commit(BeamformWorkQueue *q)
    212 {
    213 	atomic_add_u64(&q->queue, 1);
    214 }
    215 
    216 #if OS_WINDOWS
    217 // NOTE(rnp): junk needed on w32 to watch a value across processes while yielding
    218 // control back to the kernel. There are user level CPU instructions that allow
    219 // this so why w32 can't do it in kernel mode sounds like shitty design to me.
    220 DEBUG_IMPORT OSW32Semaphore os_w32_shared_memory_semaphores[countof(((BeamformerSharedMemory *)0)->locks)];
    221 #endif
    222 
    223 function b32
    224 beamformer_shared_memory_take_lock(BeamformerSharedMemory *sm, i32 lock, u32 timeout_ms)
    225 {
    226 #if OS_WINDOWS
    227 	b32 result = os_w32_semaphore_wait(os_w32_shared_memory_semaphores[lock], timeout_ms);
    228 	if (result) atomic_store_u32(sm->locks + lock, 1);
    229 #else
    230 	b32 result = take_lock(sm->locks + lock, timeout_ms);
    231 #endif
    232 	return result;
    233 }
    234 
    235 function void
    236 beamformer_shared_memory_release_lock(BeamformerSharedMemory *sm, i32 lock)
    237 {
    238 	release_lock(sm->locks + lock);
    239 #if OS_WINDOWS
    240 	os_w32_semaphore_release(os_w32_shared_memory_semaphores[lock], 1);
    241 #endif
    242 }
    243 
    244 function BeamformerParameterBlock *
    245 beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block)
    246 {
    247 	assert(sm->reserved_parameter_blocks >= block);
    248 	BeamformerParameterBlock *result = (typeof(result))((u8 *)(sm + 1) + block * sizeof(*result));
    249 	return result;
    250 }
    251 
    252 function b32
    253 beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block)
    254 {
    255 	b32 result = beamformer_parameter_block(sm, block)->dirty_regions != 0;
    256 	return result;
    257 }
    258 
    259 function BeamformerParameterBlock *
    260 beamformer_parameter_block_lock(BeamformerSharedMemory *sm, u32 block, i32 timeout_ms)
    261 {
    262 	assert(block < BeamformerMaxParameterBlockSlots);
    263 	BeamformerParameterBlock *result = 0;
    264 	if (beamformer_shared_memory_take_lock(sm, BeamformerSharedMemoryLockKind_Count + block, (u32)timeout_ms))
    265 		result = beamformer_parameter_block(sm, block);
    266 	return result;
    267 }
    268 
    269 function void
    270 beamformer_parameter_block_unlock(BeamformerSharedMemory *sm, u32 block)
    271 {
    272 	assert(block < BeamformerMaxParameterBlockSlots);
    273 	beamformer_shared_memory_release_lock(sm, BeamformerSharedMemoryLockKind_Count + block);
    274 }
    275 
    276 function Arena
    277 beamformer_shared_memory_scratch_arena(BeamformerSharedMemory *sm)
    278 {
    279 	assert(sm->reserved_parameter_blocks > 0);
    280 	BeamformerParameterBlock *last = beamformer_parameter_block(sm, sm->reserved_parameter_blocks);
    281 	Arena result = {.beg = (u8 *)(last + 1), .end = (u8 *)sm + BEAMFORMER_SHARED_MEMORY_SIZE};
    282 	result.beg = arena_aligned_start(result, KB(4));
    283 	return result;
    284 }
    285 
    286 function void
    287 mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region)
    288 {
    289 	BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block);
    290 	atomic_or_u32(&pb->dirty_regions, 1u << region);
    291 }
    292 
    293 function void
    294 post_sync_barrier(BeamformerSharedMemory *sm, BeamformerSharedMemoryLockKind lock)
    295 {
    296 	/* NOTE(rnp): debug: here it is not a bug to release the lock if it
    297 	 * isn't held but elswhere it is */
    298 	DEBUG_DECL(if (sm->locks[lock])) {
    299 		beamformer_shared_memory_release_lock(sm, lock);
    300 	}
    301 }