ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

beamformer_shared_memory.c (8923B)


      1 /* See LICENSE for license details. */
      2 #define BEAMFORMER_SHARED_MEMORY_VERSION (23UL)
      3 
      4 typedef struct BeamformerFrame BeamformerFrame;
      5 
      6 typedef enum {
      7 	BeamformerWorkKind_Compute,
      8 	BeamformerWorkKind_ComputeIndirect,
      9 	BeamformerWorkKind_CreateFilter,
     10 	BeamformerWorkKind_ReloadShader,
     11 	BeamformerWorkKind_ExportBuffer,
     12 	BeamformerWorkKind_UploadBuffer,
     13 } BeamformerWorkKind;
     14 
     15 typedef struct {
     16 	BeamformerFilterParameters parameters;
     17 	u8 filter_slot;
     18 	u8 parameter_block;
     19 	static_assert(BeamformerFilterSlots            <= 255, "CreateFilterContext only supports 255 filter slots");
     20 	static_assert(BeamformerMaxParameterBlockSlots <= 255, "CreateFilterContext only supports 255 parameter blocks");
     21 } BeamformerCreateFilterContext;
     22 
     23 typedef enum {
     24 	BeamformerExportKind_BeamformedData,
     25 	BeamformerExportKind_Stats,
     26 } BeamformerExportKind;
     27 
     28 typedef struct {
     29 	BeamformerExportKind kind;
     30 	u32 size;
     31 } BeamformerExportContext;
     32 
     33 #define BEAMFORMER_SHARED_MEMORY_LOCKS \
     34 	X(ScratchSpace)    \
     35 	X(UploadRF)        \
     36 	X(ExportSync)      \
     37 	X(DispatchCompute)
     38 
     39 #define X(name) BeamformerSharedMemoryLockKind_##name,
     40 typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Count} BeamformerSharedMemoryLockKind;
     41 #undef X
     42 
     43 typedef struct {
     44 	BeamformerFrame *frame;
     45 	u32              parameter_block;
     46 } BeamformerComputeWorkContext;
     47 
     48 typedef struct {
     49 	BeamformerViewPlaneTag view_plane;
     50 	u32                    parameter_block;
     51 } BeamformerComputeIndirectWorkContext;
     52 
     53 /* NOTE: discriminated union based on type */
     54 typedef struct {
     55 	BeamformerWorkKind kind;
     56 	BeamformerSharedMemoryLockKind lock;
     57 	union {
     58 		void                                 *generic;
     59 		BeamformerComputeWorkContext          compute_context;
     60 		BeamformerComputeIndirectWorkContext  compute_indirect_context;
     61 		BeamformerCreateFilterContext         create_filter_context;
     62 		BeamformerExportContext               export_context;
     63 		BeamformerShaderKind                  reload_shader;
     64 	};
     65 } BeamformWork;
     66 
     67 typedef struct {
     68 	union {
     69 		u64 queue;
     70 		struct {u32 widx, ridx;};
     71 	};
     72 	BeamformWork work_items[1 << 6];
     73 } BeamformWorkQueue;
     74 
     75 #define BEAMFORMER_SHARED_MEMORY_SIZE             (GB(2))
     76 #define BEAMFORMER_SHARED_MEMORY_MAX_SCRATCH_SIZE (BEAMFORMER_SHARED_MEMORY_SIZE - \
     77                                                    sizeof(BeamformerSharedMemory) - \
     78                                                    sizeof(BeamformerParameterBlock))
     79 
     80 #define X(name, id) BeamformerLiveImagingDirtyFlags_##name = (1 << id),
     81 typedef enum {BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST} BeamformerLiveImagingDirtyFlags;
     82 #undef X
     83 
     84 #define BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \
     85 	X(ComputePipeline,             pipeline)        \
     86 	X(ChannelMapping,              channel_mapping) \
     87 	X(FocalVectors,                focal_vectors)   \
     88 	X(Parameters,                  parameters)      \
     89 	X(SparseElements,              sparse_elements) \
     90 	X(TransmitReceiveOrientations, transmit_receive_orientations)
     91 
     92 typedef enum {
     93 	#define X(k, ...) BeamformerParameterBlockRegion_##k,
     94 	BEAMFORMER_PARAMETER_BLOCK_REGION_LIST
     95 	#undef X
     96 	BeamformerParameterBlockRegion_Count
     97 } BeamformerParameterBlockRegions;
     98 
     99 typedef union {
    100 	u8 filter_slot;
    101 } BeamformerShaderParameters;
    102 
    103 typedef struct {
    104 	BeamformerShaderKind       shaders[BeamformerMaxComputeShaderStages];
    105 	BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages];
    106 	u32                        shader_count;
    107 	BeamformerDataKind         data_kind;
    108 } BeamformerComputePipeline;
    109 
    110 typedef struct {
    111 	alignas(16) union {
    112 		BeamformerParameters parameters;
    113 		struct {
    114 			BeamformerParametersHead parameters_head;
    115 			BeamformerUIParameters   parameters_ui;
    116 		};
    117 	};
    118 
    119 	/* NOTE(rnp): signals to the beamformer that a subregion of a block has been updated */
    120 	u32 dirty_regions;
    121 	static_assert(BeamformerParameterBlockRegion_Count <= 32, "only 32 parameter block regions supported");
    122 
    123 	BeamformerComputePipeline pipeline;
    124 
    125 	alignas(16) i16 channel_mapping[BeamformerMaxChannelCount];
    126 	alignas(16) i16 sparse_elements[BeamformerMaxChannelCount];
    127 	alignas(16) u8  transmit_receive_orientations[BeamformerMaxChannelCount];
    128 	/* NOTE(rnp): interleaved transmit angle, focal depth pairs */
    129 	alignas(16) v2  focal_vectors[BeamformerMaxChannelCount];
    130 } BeamformerParameterBlock;
    131 static_assert(sizeof(BeamformerParameterBlock) % alignof(BeamformerParameterBlock) == 0,
    132               "sizeof(BeamformerParametersBlock) must be a multiple of its alignment");
    133 
    134 #define X(k, field) [BeamformerParameterBlockRegion_##k] = offsetof(BeamformerParameterBlock, field),
    135 read_only global u16 BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_Count] = {
    136 	BEAMFORMER_PARAMETER_BLOCK_REGION_LIST
    137 };
    138 #undef X
    139 
    140 typedef struct {
    141 	u32 version;
    142 
    143 	/* NOTE(rnp): causes future library calls to fail.
    144 	 * see note in beamformer_invalidate_shared_memory() */
    145 	b32 invalid;
    146 
    147 	/* NOTE(rnp): not used for locking on w32 but we can use these to peek at the status of
    148 	 * the lock without leaving userspace. */
    149 	i32 locks[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots];
    150 
    151 	/* NOTE(rnp): total number of parameter block regions the client has requested.
    152 	 * used to calculate offset to scratch space and to track number of allocated
    153 	 * semaphores on w32. Defaults to 1 but can be changed at runtime */
    154 	u32 reserved_parameter_blocks;
    155 
    156 	/* TODO(rnp): this is really sucky. we need a better way to communicate this */
    157 	u64 rf_block_rf_size;
    158 
    159 	BeamformerLiveImagingParameters live_imaging_parameters;
    160 	BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags;
    161 
    162 	BeamformWorkQueue external_work_queue;
    163 } BeamformerSharedMemory;
    164 
    165 function BeamformWork *
    166 beamform_work_queue_pop(BeamformWorkQueue *q)
    167 {
    168 	BeamformWork *result = 0;
    169 
    170 	static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2");
    171 	u64 val  = atomic_load_u64(&q->queue);
    172 	u64 mask = countof(q->work_items) - 1;
    173 	u64 widx = val       & mask;
    174 	u64 ridx = val >> 32 & mask;
    175 
    176 	if (ridx != widx)
    177 		result = q->work_items + ridx;
    178 
    179 	return result;
    180 }
    181 
    182 function void
    183 beamform_work_queue_pop_commit(BeamformWorkQueue *q)
    184 {
    185 	atomic_add_u64(&q->queue, 0x100000000ULL);
    186 }
    187 
    188 function BeamformWork *
    189 beamform_work_queue_push(BeamformWorkQueue *q)
    190 {
    191 	BeamformWork *result = 0;
    192 
    193 	static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2");
    194 	u64 val  = atomic_load_u64(&q->queue);
    195 	u64 mask = countof(q->work_items) - 1;
    196 	u64 widx = val        & mask;
    197 	u64 ridx = val >> 32  & mask;
    198 	u64 next = (widx + 1) & mask;
    199 
    200 	if (val & 0x80000000)
    201 		atomic_and_u64(&q->queue, ~0x80000000);
    202 
    203 	if (next != ridx) {
    204 		result = q->work_items + widx;
    205 		zero_struct(result);
    206 	}
    207 
    208 	return result;
    209 }
    210 
    211 function void
    212 beamform_work_queue_push_commit(BeamformWorkQueue *q)
    213 {
    214 	atomic_add_u64(&q->queue, 1);
    215 }
    216 
    217 function BeamformerParameterBlock *
    218 beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block)
    219 {
    220 	assert(sm->reserved_parameter_blocks >= block);
    221 	BeamformerParameterBlock *result = (typeof(result))((u8 *)(sm + 1) + block * sizeof(*result));
    222 	return result;
    223 }
    224 
    225 function b32
    226 beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block)
    227 {
    228 	b32 result = beamformer_parameter_block(sm, block)->dirty_regions != 0;
    229 	return result;
    230 }
    231 
    232 function BeamformerParameterBlock *
    233 beamformer_parameter_block_lock(SharedMemoryRegion *sm, u32 block, i32 timeout_ms)
    234 {
    235 	assert(block < BeamformerMaxParameterBlockSlots);
    236 	BeamformerSharedMemory   *b      = sm->region;
    237 	BeamformerParameterBlock *result = 0;
    238 	if (os_shared_memory_region_lock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block, (u32)timeout_ms))
    239 		result = beamformer_parameter_block(sm->region, block);
    240 	return result;
    241 }
    242 
    243 function void
    244 beamformer_parameter_block_unlock(SharedMemoryRegion *sm, u32 block)
    245 {
    246 	assert(block < BeamformerMaxParameterBlockSlots);
    247 	BeamformerSharedMemory *b = sm->region;
    248 	os_shared_memory_region_unlock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block);
    249 }
    250 
    251 function Arena
    252 beamformer_shared_memory_scratch_arena(BeamformerSharedMemory *sm)
    253 {
    254 	assert(sm->reserved_parameter_blocks > 0);
    255 	BeamformerParameterBlock *last = beamformer_parameter_block(sm, sm->reserved_parameter_blocks);
    256 	Arena result = {.beg = (u8 *)(last + 1), .end = (u8 *)sm + BEAMFORMER_SHARED_MEMORY_SIZE};
    257 	result.beg = arena_aligned_start(result, KB(4));
    258 	return result;
    259 }
    260 
    261 function void
    262 mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region)
    263 {
    264 	BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block);
    265 	atomic_or_u32(&pb->dirty_regions, 1u << region);
    266 }
    267 
    268 function void
    269 post_sync_barrier(SharedMemoryRegion *sm, BeamformerSharedMemoryLockKind lock, i32 *locks)
    270 {
    271 	/* NOTE(rnp): debug: here it is not a bug to release the lock if it
    272 	 * isn't held but elswhere it is */
    273 	DEBUG_DECL(if (locks[lock])) {
    274 		os_shared_memory_region_unlock(sm, locks, (i32)lock);
    275 	}
    276 }