beamformer_shared_memory.c (9480B)
1 /* See LICENSE for license details. */ 2 #define BEAMFORMER_SHARED_MEMORY_VERSION (14UL) 3 4 typedef struct BeamformerFrame BeamformerFrame; 5 typedef struct ShaderReloadContext ShaderReloadContext; 6 7 typedef enum { 8 BeamformerWorkKind_Compute, 9 BeamformerWorkKind_ComputeIndirect, 10 BeamformerWorkKind_CreateFilter, 11 BeamformerWorkKind_ReloadShader, 12 BeamformerWorkKind_ExportBuffer, 13 BeamformerWorkKind_UploadBuffer, 14 } BeamformerWorkKind; 15 16 /* TODO(rnp): this is massively bloating the queue; think of some other 17 * way to communicate these to the beamformer */ 18 typedef struct { 19 union { 20 #define X(kind, ...) struct {__VA_ARGS__ ;} kind; 21 BEAMFORMER_FILTER_KIND_LIST(f32, ;) 22 #undef X 23 }; 24 f32 sampling_frequency; 25 b16 complex; 26 } BeamformerFilterParameters; 27 28 typedef struct { 29 BeamformerFilterKind kind; 30 BeamformerFilterParameters parameters; 31 u8 filter_slot; 32 u8 parameter_block; 33 static_assert(BeamformerFilterSlots <= 255, "CreateFilterContext only supports 255 filter slots"); 34 static_assert(BeamformerMaxParameterBlockSlots <= 255, "CreateFilterContext only supports 255 parameter blocks"); 35 } BeamformerCreateFilterContext; 36 37 typedef enum { 38 BeamformerExportKind_BeamformedData, 39 BeamformerExportKind_Stats, 40 } BeamformerExportKind; 41 42 typedef struct { 43 BeamformerExportKind kind; 44 u32 size; 45 } BeamformerExportContext; 46 47 #define BEAMFORMER_SHARED_MEMORY_LOCKS \ 48 X(ScratchSpace) \ 49 X(UploadRF) \ 50 X(ExportSync) \ 51 X(DispatchCompute) 52 53 #define X(name) BeamformerSharedMemoryLockKind_##name, 54 typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Count} BeamformerSharedMemoryLockKind; 55 #undef X 56 57 typedef struct { 58 BeamformerFrame *frame; 59 u32 parameter_block; 60 } BeamformerComputeWorkContext; 61 62 typedef struct { 63 BeamformerViewPlaneTag view_plane; 64 u32 parameter_block; 65 } BeamformerComputeIndirectWorkContext; 66 67 /* NOTE: discriminated union based on type */ 68 typedef struct { 69 BeamformerWorkKind kind; 70 BeamformerSharedMemoryLockKind lock; 71 union { 72 void *generic; 73 BeamformerComputeWorkContext compute_context; 74 BeamformerComputeIndirectWorkContext compute_indirect_context; 75 BeamformerCreateFilterContext create_filter_context; 76 BeamformerExportContext export_context; 77 ShaderReloadContext *shader_reload_context; 78 }; 79 } BeamformWork; 80 81 typedef struct { 82 union { 83 u64 queue; 84 struct {u32 widx, ridx;}; 85 }; 86 BeamformWork work_items[1 << 6]; 87 } BeamformWorkQueue; 88 89 #define BEAMFORMER_SHARED_MEMORY_SIZE (GB(2)) 90 #define BEAMFORMER_SHARED_MEMORY_MAX_SCRATCH_SIZE (BEAMFORMER_SHARED_MEMORY_SIZE - \ 91 sizeof(BeamformerSharedMemory) - \ 92 sizeof(BeamformerParameterBlock)) 93 94 #define X(name, id) BeamformerLiveImagingDirtyFlags_##name = (1 << id), 95 typedef enum {BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST} BeamformerLiveImagingDirtyFlags; 96 #undef X 97 98 #define BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \ 99 X(ComputePipeline, pipeline) \ 100 X(ChannelMapping, channel_mapping) \ 101 X(FocalVectors, focal_vectors) \ 102 X(Parameters, parameters) \ 103 X(SparseElements, sparse_elements) 104 105 typedef enum { 106 #define X(k, ...) BeamformerParameterBlockRegion_##k, 107 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 108 #undef X 109 BeamformerParameterBlockRegion_Count 110 } BeamformerParameterBlockRegions; 111 112 typedef union { 113 u8 filter_slot; 114 } BeamformerShaderParameters; 115 116 typedef struct { 117 BeamformerShaderKind shaders[BeamformerMaxComputeShaderStages]; 118 BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages]; 119 u32 program_indices[BeamformerMaxComputeShaderStages]; 120 u32 shader_count; 121 BeamformerDataKind data_kind; 122 } BeamformerComputePipeline; 123 124 typedef struct { 125 alignas(16) union { 126 BeamformerParameters parameters; 127 struct { 128 BeamformerParametersHead parameters_head; 129 BeamformerUIParameters parameters_ui; 130 }; 131 }; 132 133 /* NOTE(rnp): signals to the beamformer that a subregion of a block has been updated */ 134 u32 dirty_regions; 135 static_assert(BeamformerParameterBlockRegion_Count <= 32, "only 32 parameter block regions supported"); 136 137 BeamformerComputePipeline pipeline; 138 139 alignas(16) i16 channel_mapping[BeamformerMaxChannelCount]; 140 alignas(16) i16 sparse_elements[BeamformerMaxChannelCount]; 141 /* NOTE(rnp): interleaved transmit angle, focal depth pairs */ 142 alignas(16) v2 focal_vectors[BeamformerMaxChannelCount]; 143 } BeamformerParameterBlock; 144 static_assert(sizeof(BeamformerParameterBlock) % alignof(BeamformerParameterBlock) == 0, 145 "sizeof(BeamformerParametersBlock) must be a multiple of its alignment"); 146 147 #define X(k, field) [BeamformerParameterBlockRegion_##k] = offsetof(BeamformerParameterBlock, field), 148 read_only global u16 BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_Count] = { 149 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 150 }; 151 #undef X 152 153 typedef struct { 154 u32 version; 155 156 /* NOTE(rnp): causes future library calls to fail. 157 * see note in beamformer_invalidate_shared_memory() */ 158 b32 invalid; 159 160 /* NOTE(rnp): not used for locking on w32 but we can use these to peek at the status of 161 * the lock without leaving userspace. */ 162 i32 locks[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots]; 163 164 /* NOTE(rnp): total number of parameter block regions the client has requested. 165 * used to calculate offset to scratch space and to track number of allocated 166 * semaphores on w32. Defaults to 1 but can be changed at runtime */ 167 u32 reserved_parameter_blocks; 168 169 /* TODO(rnp): this is really sucky. we need a better way to communicate this */ 170 u32 scratch_rf_size; 171 172 BeamformerLiveImagingParameters live_imaging_parameters; 173 BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags; 174 175 BeamformWorkQueue external_work_queue; 176 } BeamformerSharedMemory; 177 178 function BeamformWork * 179 beamform_work_queue_pop(BeamformWorkQueue *q) 180 { 181 BeamformWork *result = 0; 182 183 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 184 u64 val = atomic_load_u64(&q->queue); 185 u64 mask = countof(q->work_items) - 1; 186 u64 widx = val & mask; 187 u64 ridx = val >> 32 & mask; 188 189 if (ridx != widx) 190 result = q->work_items + ridx; 191 192 return result; 193 } 194 195 function void 196 beamform_work_queue_pop_commit(BeamformWorkQueue *q) 197 { 198 atomic_add_u64(&q->queue, 0x100000000ULL); 199 } 200 201 function BeamformWork * 202 beamform_work_queue_push(BeamformWorkQueue *q) 203 { 204 BeamformWork *result = 0; 205 206 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 207 u64 val = atomic_load_u64(&q->queue); 208 u64 mask = countof(q->work_items) - 1; 209 u64 widx = val & mask; 210 u64 ridx = val >> 32 & mask; 211 u64 next = (widx + 1) & mask; 212 213 if (val & 0x80000000) 214 atomic_and_u64(&q->queue, ~0x80000000); 215 216 if (next != ridx) { 217 result = q->work_items + widx; 218 zero_struct(result); 219 } 220 221 return result; 222 } 223 224 function void 225 beamform_work_queue_push_commit(BeamformWorkQueue *q) 226 { 227 atomic_add_u64(&q->queue, 1); 228 } 229 230 function BeamformerParameterBlock * 231 beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block) 232 { 233 assert(sm->reserved_parameter_blocks >= block); 234 BeamformerParameterBlock *result = (typeof(result))((u8 *)(sm + 1) + block * sizeof(*result)); 235 return result; 236 } 237 238 function b32 239 beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block) 240 { 241 b32 result = beamformer_parameter_block(sm, block)->dirty_regions != 0; 242 return result; 243 } 244 245 function BeamformerParameterBlock * 246 beamformer_parameter_block_lock(SharedMemoryRegion *sm, u32 block, i32 timeout_ms) 247 { 248 assert(block < BeamformerMaxParameterBlockSlots); 249 BeamformerSharedMemory *b = sm->region; 250 BeamformerParameterBlock *result = 0; 251 if (os_shared_memory_region_lock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block, (u32)timeout_ms)) 252 result = beamformer_parameter_block(sm->region, block); 253 return result; 254 } 255 256 function void 257 beamformer_parameter_block_unlock(SharedMemoryRegion *sm, u32 block) 258 { 259 assert(block < BeamformerMaxParameterBlockSlots); 260 BeamformerSharedMemory *b = sm->region; 261 os_shared_memory_region_unlock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block); 262 } 263 264 function Arena 265 beamformer_shared_memory_scratch_arena(BeamformerSharedMemory *sm) 266 { 267 assert(sm->reserved_parameter_blocks > 0); 268 BeamformerParameterBlock *last = beamformer_parameter_block(sm, sm->reserved_parameter_blocks); 269 Arena result = {.beg = (u8 *)(last + 1), .end = (u8 *)sm + BEAMFORMER_SHARED_MEMORY_SIZE}; 270 result.beg = arena_aligned_start(result, KB(4)); 271 return result; 272 } 273 274 function void 275 mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region) 276 { 277 BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block); 278 atomic_or_u32(&pb->dirty_regions, 1 << region); 279 } 280 281 function void 282 mark_parameter_block_region_clean(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region) 283 { 284 BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block); 285 atomic_and_u32(&pb->dirty_regions, ~(1 << region)); 286 } 287 288 function void 289 post_sync_barrier(SharedMemoryRegion *sm, BeamformerSharedMemoryLockKind lock, i32 *locks) 290 { 291 /* NOTE(rnp): debug: here it is not a bug to release the lock if it 292 * isn't held but elswhere it is */ 293 DEBUG_DECL(if (locks[lock])) { 294 os_shared_memory_region_unlock(sm, locks, (i32)lock); 295 } 296 }