beamformer_shared_memory.c (9391B)
1 /* See LICENSE for license details. */ 2 #define BEAMFORMER_SHARED_MEMORY_VERSION (14UL) 3 4 typedef struct BeamformerFrame BeamformerFrame; 5 typedef struct ShaderReloadContext ShaderReloadContext; 6 7 typedef enum { 8 BeamformerWorkKind_Compute, 9 BeamformerWorkKind_ComputeIndirect, 10 BeamformerWorkKind_CreateFilter, 11 BeamformerWorkKind_ReloadShader, 12 BeamformerWorkKind_ExportBuffer, 13 BeamformerWorkKind_UploadBuffer, 14 } BeamformerWorkKind; 15 16 /* TODO(rnp): this is massively bloating the queue; think of some other 17 * way to communicate these to the beamformer */ 18 typedef struct { 19 union { 20 #define X(kind, ...) struct {__VA_ARGS__ ;} kind; 21 BEAMFORMER_FILTER_KIND_LIST(f32, ;) 22 #undef X 23 }; 24 f32 sampling_frequency; 25 b16 complex; 26 } BeamformerFilterParameters; 27 28 typedef struct { 29 BeamformerFilterKind kind; 30 BeamformerFilterParameters parameters; 31 u8 filter_slot; 32 u8 parameter_block; 33 static_assert(BeamformerFilterSlots <= 255, "CreateFilterContext only supports 255 filter slots"); 34 static_assert(BeamformerMaxParameterBlockSlots <= 255, "CreateFilterContext only supports 255 parameter blocks"); 35 } BeamformerCreateFilterContext; 36 37 typedef enum { 38 BeamformerExportKind_BeamformedData, 39 BeamformerExportKind_Stats, 40 } BeamformerExportKind; 41 42 typedef struct { 43 BeamformerExportKind kind; 44 u32 size; 45 } BeamformerExportContext; 46 47 #define BEAMFORMER_SHARED_MEMORY_LOCKS \ 48 X(ScratchSpace) \ 49 X(UploadRF) \ 50 X(ExportSync) \ 51 X(DispatchCompute) 52 53 #define X(name) BeamformerSharedMemoryLockKind_##name, 54 typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Count} BeamformerSharedMemoryLockKind; 55 #undef X 56 57 typedef struct { 58 BeamformerFrame *frame; 59 u32 parameter_block; 60 } BeamformerComputeWorkContext; 61 62 typedef struct { 63 BeamformerViewPlaneTag view_plane; 64 u32 parameter_block; 65 } BeamformerComputeIndirectWorkContext; 66 67 /* NOTE: discriminated union based on type */ 68 typedef struct { 69 BeamformerWorkKind kind; 70 BeamformerSharedMemoryLockKind lock; 71 union { 72 void *generic; 73 BeamformerComputeWorkContext compute_context; 74 BeamformerComputeIndirectWorkContext compute_indirect_context; 75 BeamformerCreateFilterContext create_filter_context; 76 BeamformerExportContext export_context; 77 ShaderReloadContext *shader_reload_context; 78 }; 79 } BeamformWork; 80 81 typedef struct { 82 union { 83 u64 queue; 84 struct {u32 widx, ridx;}; 85 }; 86 BeamformWork work_items[1 << 6]; 87 } BeamformWorkQueue; 88 89 #define BEAMFORMER_SHARED_MEMORY_SIZE (GB(2)) 90 #define BEAMFORMER_SHARED_MEMORY_MAX_SCRATCH_SIZE (BEAMFORMER_SHARED_MEMORY_SIZE - \ 91 sizeof(BeamformerSharedMemory) - \ 92 sizeof(BeamformerParameterBlock)) 93 94 #define X(name, id) BeamformerLiveImagingDirtyFlags_##name = (1 << id), 95 typedef enum {BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST} BeamformerLiveImagingDirtyFlags; 96 #undef X 97 98 #define BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \ 99 X(ComputePipeline, pipeline) \ 100 X(ChannelMapping, channel_mapping) \ 101 X(FocalVectors, focal_vectors) \ 102 X(Parameters, parameters) \ 103 X(SparseElements, sparse_elements) 104 105 typedef enum { 106 #define X(k, ...) BeamformerParameterBlockRegion_##k, 107 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 108 #undef X 109 BeamformerParameterBlockRegion_Count 110 } BeamformerParameterBlockRegions; 111 112 typedef union { 113 u8 filter_slot; 114 } BeamformerShaderParameters; 115 116 typedef struct { 117 BeamformerShaderKind shaders[BeamformerMaxComputeShaderStages]; 118 BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages]; 119 u32 shader_count; 120 BeamformerDataKind data_kind; 121 } BeamformerComputePipeline; 122 123 typedef struct { 124 alignas(16) union { 125 BeamformerParameters parameters; 126 struct { 127 BeamformerParametersHead parameters_head; 128 BeamformerUIParameters parameters_ui; 129 }; 130 }; 131 132 /* NOTE(rnp): signals to the beamformer that a subregion of a block has been updated */ 133 u32 dirty_regions; 134 static_assert(BeamformerParameterBlockRegion_Count <= 32, "only 32 parameter block regions supported"); 135 136 BeamformerComputePipeline pipeline; 137 138 alignas(16) i16 channel_mapping[BeamformerMaxChannelCount]; 139 alignas(16) i16 sparse_elements[BeamformerMaxChannelCount]; 140 /* NOTE(rnp): interleaved transmit angle, focal depth pairs */ 141 alignas(16) v2 focal_vectors[BeamformerMaxChannelCount]; 142 } BeamformerParameterBlock; 143 static_assert(sizeof(BeamformerParameterBlock) % alignof(BeamformerParameterBlock) == 0, 144 "sizeof(BeamformerParametersBlock) must be a multiple of its alignment"); 145 146 #define X(k, field) [BeamformerParameterBlockRegion_##k] = offsetof(BeamformerParameterBlock, field), 147 read_only global u16 BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_Count] = { 148 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 149 }; 150 #undef X 151 152 typedef struct { 153 u32 version; 154 155 /* NOTE(rnp): causes future library calls to fail. 156 * see note in beamformer_invalidate_shared_memory() */ 157 b32 invalid; 158 159 /* NOTE(rnp): not used for locking on w32 but we can use these to peek at the status of 160 * the lock without leaving userspace. */ 161 i32 locks[BeamformerSharedMemoryLockKind_Count + BeamformerMaxParameterBlockSlots]; 162 163 /* NOTE(rnp): total number of parameter block regions the client has requested. 164 * used to calculate offset to scratch space and to track number of allocated 165 * semaphores on w32. Defaults to 1 but can be changed at runtime */ 166 u32 reserved_parameter_blocks; 167 168 /* TODO(rnp): this is really sucky. we need a better way to communicate this */ 169 u32 scratch_rf_size; 170 171 BeamformerLiveImagingParameters live_imaging_parameters; 172 BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags; 173 174 BeamformWorkQueue external_work_queue; 175 } BeamformerSharedMemory; 176 177 function BeamformWork * 178 beamform_work_queue_pop(BeamformWorkQueue *q) 179 { 180 BeamformWork *result = 0; 181 182 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 183 u64 val = atomic_load_u64(&q->queue); 184 u64 mask = countof(q->work_items) - 1; 185 u64 widx = val & mask; 186 u64 ridx = val >> 32 & mask; 187 188 if (ridx != widx) 189 result = q->work_items + ridx; 190 191 return result; 192 } 193 194 function void 195 beamform_work_queue_pop_commit(BeamformWorkQueue *q) 196 { 197 atomic_add_u64(&q->queue, 0x100000000ULL); 198 } 199 200 function BeamformWork * 201 beamform_work_queue_push(BeamformWorkQueue *q) 202 { 203 BeamformWork *result = 0; 204 205 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 206 u64 val = atomic_load_u64(&q->queue); 207 u64 mask = countof(q->work_items) - 1; 208 u64 widx = val & mask; 209 u64 ridx = val >> 32 & mask; 210 u64 next = (widx + 1) & mask; 211 212 if (val & 0x80000000) 213 atomic_and_u64(&q->queue, ~0x80000000); 214 215 if (next != ridx) { 216 result = q->work_items + widx; 217 zero_struct(result); 218 } 219 220 return result; 221 } 222 223 function void 224 beamform_work_queue_push_commit(BeamformWorkQueue *q) 225 { 226 atomic_add_u64(&q->queue, 1); 227 } 228 229 function BeamformerParameterBlock * 230 beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block) 231 { 232 assert(sm->reserved_parameter_blocks >= block); 233 BeamformerParameterBlock *result = (typeof(result))((u8 *)(sm + 1) + block * sizeof(*result)); 234 return result; 235 } 236 237 function b32 238 beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block) 239 { 240 b32 result = beamformer_parameter_block(sm, block)->dirty_regions != 0; 241 return result; 242 } 243 244 function BeamformerParameterBlock * 245 beamformer_parameter_block_lock(SharedMemoryRegion *sm, u32 block, i32 timeout_ms) 246 { 247 assert(block < BeamformerMaxParameterBlockSlots); 248 BeamformerSharedMemory *b = sm->region; 249 BeamformerParameterBlock *result = 0; 250 if (os_shared_memory_region_lock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block, (u32)timeout_ms)) 251 result = beamformer_parameter_block(sm->region, block); 252 return result; 253 } 254 255 function void 256 beamformer_parameter_block_unlock(SharedMemoryRegion *sm, u32 block) 257 { 258 assert(block < BeamformerMaxParameterBlockSlots); 259 BeamformerSharedMemory *b = sm->region; 260 os_shared_memory_region_unlock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block); 261 } 262 263 function Arena 264 beamformer_shared_memory_scratch_arena(BeamformerSharedMemory *sm) 265 { 266 assert(sm->reserved_parameter_blocks > 0); 267 BeamformerParameterBlock *last = beamformer_parameter_block(sm, sm->reserved_parameter_blocks); 268 Arena result = {.beg = (u8 *)(last + 1), .end = (u8 *)sm + BEAMFORMER_SHARED_MEMORY_SIZE}; 269 result.beg = arena_aligned_start(result, KB(4)); 270 return result; 271 } 272 273 function void 274 mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region) 275 { 276 BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block); 277 atomic_or_u32(&pb->dirty_regions, 1 << region); 278 } 279 280 function void 281 mark_parameter_block_region_clean(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region) 282 { 283 BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block); 284 atomic_and_u32(&pb->dirty_regions, ~(1 << region)); 285 } 286 287 function void 288 post_sync_barrier(SharedMemoryRegion *sm, BeamformerSharedMemoryLockKind lock, i32 *locks) 289 { 290 /* NOTE(rnp): debug: here it is not a bug to release the lock if it 291 * isn't held but elswhere it is */ 292 DEBUG_DECL(if (locks[lock])) { 293 os_shared_memory_region_unlock(sm, locks, (i32)lock); 294 } 295 }