ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

beamformer_shared_memory.c (9480B)


      1 /* See LICENSE for license details. */
      2 #define BEAMFORMER_SHARED_MEMORY_VERSION (14UL)
      3 
      4 typedef struct BeamformerFrame     BeamformerFrame;
      5 typedef struct ShaderReloadContext ShaderReloadContext;
      6 
      7 typedef enum {
      8 	BeamformerWorkKind_Compute,
      9 	BeamformerWorkKind_ComputeIndirect,
     10 	BeamformerWorkKind_CreateFilter,
     11 	BeamformerWorkKind_ReloadShader,
     12 	BeamformerWorkKind_ExportBuffer,
     13 	BeamformerWorkKind_UploadBuffer,
     14 } BeamformerWorkKind;
     15 
     16 /* TODO(rnp): this is massively bloating the queue; think of some other
     17  * way to communicate these to the beamformer */
     18 typedef struct {
     19 	union {
     20 		#define X(kind, ...) struct {__VA_ARGS__ ;} kind;
     21 		BEAMFORMER_FILTER_KIND_LIST(f32, ;)
     22 		#undef X
     23 	};
     24 	f32 sampling_frequency;
     25 	b16 complex;
     26 } BeamformerFilterParameters;
     27 
     28 typedef struct {
     29 	BeamformerFilterKind       kind;
     30 	BeamformerFilterParameters parameters;
     31 	u8 filter_slot;
     32 	u8 parameter_block;
     33 	static_assert(BeamformerFilterSlots            <= 255, "CreateFilterContext only supports 255 filter slots");
     34 	static_assert(BeamformerMaxParameterBlockSlots <= 255, "CreateFilterContext only supports 255 parameter blocks");
     35 } BeamformerCreateFilterContext;
     36 
     37 typedef enum {
     38 	BeamformerExportKind_BeamformedData,
     39 	BeamformerExportKind_Stats,
     40 } BeamformerExportKind;
     41 
     42 typedef struct {
     43 	BeamformerExportKind kind;
     44 	u32 size;
     45 } BeamformerExportContext;
     46 
     47 #define BEAMFORMER_SHARED_MEMORY_LOCKS \
     48 	X(ScratchSpace)    \
     49 	X(UploadRF)        \
     50 	X(ExportSync)      \
     51 	X(DispatchCompute)
     52 
     53 #define X(name) BeamformerSharedMemoryLockKind_##name,
     54 typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Count} BeamformerSharedMemoryLockKind;
     55 #undef X
     56 
     57 typedef struct {
     58 	BeamformerFrame *frame;
     59 	u32              parameter_block;
     60 } BeamformerComputeWorkContext;
     61 
     62 typedef struct {
     63 	BeamformerViewPlaneTag view_plane;
     64 	u32                    parameter_block;
     65 } BeamformerComputeIndirectWorkContext;
     66 
     67 /* NOTE: discriminated union based on type */
     68 typedef struct {
     69 	BeamformerWorkKind kind;
     70 	BeamformerSharedMemoryLockKind lock;
     71 	union {
     72 		void                                 *generic;
     73 		BeamformerComputeWorkContext          compute_context;
     74 		BeamformerComputeIndirectWorkContext  compute_indirect_context;
     75 		BeamformerCreateFilterContext         create_filter_context;
     76 		BeamformerExportContext               export_context;
     77 		ShaderReloadContext                  *shader_reload_context;
     78 	};
     79 } BeamformWork;
     80 
     81 typedef struct {
     82 	union {
     83 		u64 queue;
     84 		struct {u32 widx, ridx;};
     85 	};
     86 	BeamformWork work_items[1 << 6];
     87 } BeamformWorkQueue;
     88 
     89 #define BEAMFORMER_SHARED_MEMORY_SIZE             (GB(2))
     90 #define BEAMFORMER_SHARED_MEMORY_MAX_SCRATCH_SIZE (BEAMFORMER_SHARED_MEMORY_SIZE - \
     91                                                    sizeof(BeamformerSharedMemory) - \
     92                                                    sizeof(BeamformerParameterBlock))
     93 
     94 #define X(name, id) BeamformerLiveImagingDirtyFlags_##name = (1 << id),
     95 typedef enum {BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST} BeamformerLiveImagingDirtyFlags;
     96 #undef X
     97 
     98 #define BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \
     99 	X(ComputePipeline, pipeline)        \
    100 	X(ChannelMapping,  channel_mapping) \
    101 	X(FocalVectors,    focal_vectors)   \
    102 	X(Parameters,      parameters)      \
    103 	X(SparseElements,  sparse_elements)
    104 
    105 typedef enum {
    106 	#define X(k, ...) BeamformerParameterBlockRegion_##k,
    107 	BEAMFORMER_PARAMETER_BLOCK_REGION_LIST
    108 	#undef X
    109 	BeamformerParameterBlockRegion_Count
    110 } BeamformerParameterBlockRegions;
    111 
    112 typedef union {
    113 	u8 filter_slot;
    114 } BeamformerShaderParameters;
    115 
    116 typedef struct {
    117 	BeamformerShaderKind       shaders[BeamformerMaxComputeShaderStages];
    118 	BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages];
    119 	u32                        program_indices[BeamformerMaxComputeShaderStages];
    120 	u32                        shader_count;
    121 	BeamformerDataKind         data_kind;
    122 } BeamformerComputePipeline;
    123 
    124 typedef struct {
    125 	alignas(16) union {
    126 		BeamformerParameters parameters;
    127 		struct {
    128 			BeamformerParametersHead parameters_head;
    129 			BeamformerUIParameters   parameters_ui;
    130 		};
    131 	};
    132 
    133 	/* NOTE(rnp): signals to the beamformer that a subregion of a block has been updated */
    134 	u32 dirty_regions;
    135 	static_assert(BeamformerParameterBlockRegion_Count <= 32, "only 32 parameter block regions supported");
    136 
    137 	BeamformerComputePipeline pipeline;
    138 
    139 	alignas(16) i16 channel_mapping[BeamformerMaxChannelCount];
    140 	alignas(16) i16 sparse_elements[BeamformerMaxChannelCount];
    141 	/* NOTE(rnp): interleaved transmit angle, focal depth pairs */
    142 	alignas(16) v2  focal_vectors[BeamformerMaxChannelCount];
    143 } BeamformerParameterBlock;
    144 static_assert(sizeof(BeamformerParameterBlock) % alignof(BeamformerParameterBlock) == 0,
    145               "sizeof(BeamformerParametersBlock) must be a multiple of its alignment");
    146 
    147 #define X(k, field) [BeamformerParameterBlockRegion_##k] = offsetof(BeamformerParameterBlock, field),
    148 read_only global u16 BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_Count] = {
    149 	BEAMFORMER_PARAMETER_BLOCK_REGION_LIST
    150 };
    151 #undef X
    152 
    153 typedef struct {
    154 	u32 version;
    155 
    156 	/* NOTE(rnp): causes future library calls to fail.
    157 	 * see note in beamformer_invalidate_shared_memory() */
    158 	b32 invalid;
    159 
    160 	/* NOTE(rnp): not used for locking on w32 but we can use these to peek at the status of
    161 	 * the lock without leaving userspace. */
    162 	i32 locks[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots];
    163 
    164 	/* NOTE(rnp): total number of parameter block regions the client has requested.
    165 	 * used to calculate offset to scratch space and to track number of allocated
    166 	 * semaphores on w32. Defaults to 1 but can be changed at runtime */
    167 	u32 reserved_parameter_blocks;
    168 
    169 	/* TODO(rnp): this is really sucky. we need a better way to communicate this */
    170 	u32 scratch_rf_size;
    171 
    172 	BeamformerLiveImagingParameters live_imaging_parameters;
    173 	BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags;
    174 
    175 	BeamformWorkQueue external_work_queue;
    176 } BeamformerSharedMemory;
    177 
    178 function BeamformWork *
    179 beamform_work_queue_pop(BeamformWorkQueue *q)
    180 {
    181 	BeamformWork *result = 0;
    182 
    183 	static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2");
    184 	u64 val  = atomic_load_u64(&q->queue);
    185 	u64 mask = countof(q->work_items) - 1;
    186 	u64 widx = val       & mask;
    187 	u64 ridx = val >> 32 & mask;
    188 
    189 	if (ridx != widx)
    190 		result = q->work_items + ridx;
    191 
    192 	return result;
    193 }
    194 
    195 function void
    196 beamform_work_queue_pop_commit(BeamformWorkQueue *q)
    197 {
    198 	atomic_add_u64(&q->queue, 0x100000000ULL);
    199 }
    200 
    201 function BeamformWork *
    202 beamform_work_queue_push(BeamformWorkQueue *q)
    203 {
    204 	BeamformWork *result = 0;
    205 
    206 	static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2");
    207 	u64 val  = atomic_load_u64(&q->queue);
    208 	u64 mask = countof(q->work_items) - 1;
    209 	u64 widx = val        & mask;
    210 	u64 ridx = val >> 32  & mask;
    211 	u64 next = (widx + 1) & mask;
    212 
    213 	if (val & 0x80000000)
    214 		atomic_and_u64(&q->queue, ~0x80000000);
    215 
    216 	if (next != ridx) {
    217 		result = q->work_items + widx;
    218 		zero_struct(result);
    219 	}
    220 
    221 	return result;
    222 }
    223 
    224 function void
    225 beamform_work_queue_push_commit(BeamformWorkQueue *q)
    226 {
    227 	atomic_add_u64(&q->queue, 1);
    228 }
    229 
    230 function BeamformerParameterBlock *
    231 beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block)
    232 {
    233 	assert(sm->reserved_parameter_blocks >= block);
    234 	BeamformerParameterBlock *result = (typeof(result))((u8 *)(sm + 1) + block * sizeof(*result));
    235 	return result;
    236 }
    237 
    238 function b32
    239 beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block)
    240 {
    241 	b32 result = beamformer_parameter_block(sm, block)->dirty_regions != 0;
    242 	return result;
    243 }
    244 
    245 function BeamformerParameterBlock *
    246 beamformer_parameter_block_lock(SharedMemoryRegion *sm, u32 block, i32 timeout_ms)
    247 {
    248 	assert(block < BeamformerMaxParameterBlockSlots);
    249 	BeamformerSharedMemory   *b      = sm->region;
    250 	BeamformerParameterBlock *result = 0;
    251 	if (os_shared_memory_region_lock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block, (u32)timeout_ms))
    252 		result = beamformer_parameter_block(sm->region, block);
    253 	return result;
    254 }
    255 
    256 function void
    257 beamformer_parameter_block_unlock(SharedMemoryRegion *sm, u32 block)
    258 {
    259 	assert(block < BeamformerMaxParameterBlockSlots);
    260 	BeamformerSharedMemory *b = sm->region;
    261 	os_shared_memory_region_unlock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block);
    262 }
    263 
    264 function Arena
    265 beamformer_shared_memory_scratch_arena(BeamformerSharedMemory *sm)
    266 {
    267 	assert(sm->reserved_parameter_blocks > 0);
    268 	BeamformerParameterBlock *last = beamformer_parameter_block(sm, sm->reserved_parameter_blocks);
    269 	Arena result = {.beg = (u8 *)(last + 1), .end = (u8 *)sm + BEAMFORMER_SHARED_MEMORY_SIZE};
    270 	result.beg = arena_aligned_start(result, KB(4));
    271 	return result;
    272 }
    273 
    274 function void
    275 mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region)
    276 {
    277 	BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block);
    278 	atomic_or_u32(&pb->dirty_regions, 1 << region);
    279 }
    280 
    281 function void
    282 mark_parameter_block_region_clean(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region)
    283 {
    284 	BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block);
    285 	atomic_and_u32(&pb->dirty_regions, ~(1 << region));
    286 }
    287 
    288 function void
    289 post_sync_barrier(SharedMemoryRegion *sm, BeamformerSharedMemoryLockKind lock, i32 *locks)
    290 {
    291 	/* NOTE(rnp): debug: here it is not a bug to release the lock if it
    292 	 * isn't held but elswhere it is */
    293 	DEBUG_DECL(if (locks[lock])) {
    294 		os_shared_memory_region_unlock(sm, locks, (i32)lock);
    295 	}
    296 }