beamformer_shared_memory.c (9711B)
1 /* See LICENSE for license details. */ 2 #define BEAMFORMER_SHARED_MEMORY_VERSION (26UL) 3 4 typedef struct BeamformerFrame BeamformerFrame; 5 6 typedef enum { 7 BeamformerWorkKind_Compute, 8 BeamformerWorkKind_ComputeIndirect, 9 BeamformerWorkKind_CreateFilter, 10 BeamformerWorkKind_ExportBuffer, 11 } BeamformerWorkKind; 12 13 typedef struct { 14 BeamformerFilterParameters parameters; 15 u8 filter_slot; 16 u8 parameter_block; 17 static_assert(BeamformerFilterSlots <= 255, "CreateFilterContext only supports 255 filter slots"); 18 static_assert(BeamformerMaxParameterBlockSlots <= 255, "CreateFilterContext only supports 255 parameter blocks"); 19 } BeamformerCreateFilterContext; 20 21 typedef enum { 22 BeamformerExportKind_BeamformedData, 23 BeamformerExportKind_Stats, 24 } BeamformerExportKind; 25 26 typedef struct { 27 BeamformerExportKind kind; 28 u32 size; 29 } BeamformerExportContext; 30 31 #define BEAMFORMER_SHARED_MEMORY_LOCKS \ 32 X(ScratchSpace) \ 33 X(UploadRF) \ 34 X(ExportSync) \ 35 X(DispatchCompute) 36 37 #define X(name) BeamformerSharedMemoryLockKind_##name, 38 typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Count} BeamformerSharedMemoryLockKind; 39 #undef X 40 41 typedef struct { 42 BeamformerFrame *frame; 43 u32 parameter_block; 44 } BeamformerComputeWorkContext; 45 46 typedef struct { 47 BeamformerViewPlaneTag view_plane; 48 u32 parameter_block; 49 } BeamformerComputeIndirectWorkContext; 50 51 /* NOTE: discriminated union based on type */ 52 typedef struct { 53 BeamformerWorkKind kind; 54 BeamformerSharedMemoryLockKind lock; 55 union { 56 void *generic; 57 BeamformerComputeWorkContext compute_context; 58 BeamformerComputeIndirectWorkContext compute_indirect_context; 59 BeamformerCreateFilterContext create_filter_context; 60 BeamformerExportContext export_context; 61 BeamformerShaderKind reload_shader; 62 }; 63 } BeamformWork; 64 65 typedef struct { 66 union { 67 u64 queue; 68 struct {u32 widx, ridx;}; 69 }; 70 BeamformWork work_items[1 << 6]; 71 } BeamformWorkQueue; 72 73 #define X(name, id) BeamformerLiveImagingDirtyFlags_##name = (1 << id), 74 typedef enum {BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST} BeamformerLiveImagingDirtyFlags; 75 #undef X 76 77 #define BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \ 78 X(ComputePipeline, pipeline) \ 79 X(ChannelMapping, channel_mapping) \ 80 X(FocalVectors, focal_vectors) \ 81 X(Parameters, parameters) \ 82 X(SparseElements, sparse_elements) \ 83 X(TransmitReceiveOrientations, transmit_receive_orientations) \ 84 85 #define BEAMFORMER_PARAMETER_BLOCK_REGION_FLAG_LIST \ 86 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \ 87 X(NotifyUI) \ 88 89 typedef enum { 90 #define X(k, ...) BeamformerParameterBlockRegion_##k, 91 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 92 #undef X 93 BeamformerParameterBlockRegion_Count 94 } BeamformerParameterBlockRegions; 95 96 typedef enum { 97 #define X(k, ...) BeamformerParameterRegionFlag_##k, 98 BEAMFORMER_PARAMETER_BLOCK_REGION_FLAG_LIST 99 #undef X 100 BeamformerParameterRegionFlag_Count, 101 } BeamformerParameterRegionFlags; 102 103 typedef union { 104 u8 filter_slot; 105 } BeamformerShaderParameters; 106 107 typedef struct { 108 BeamformerShaderKind shaders[BeamformerMaxComputeShaderStages]; 109 BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages]; 110 u32 shader_count; 111 BeamformerDataKind data_kind; 112 } BeamformerComputePipeline; 113 114 typedef struct { 115 alignas(16) union { 116 BeamformerParameters parameters; 117 struct { 118 BeamformerParametersHead parameters_head; 119 BeamformerUIParameters parameters_ui; 120 BeamformerParametersExtra parameters_extra; 121 }; 122 }; 123 124 /* NOTE(rnp): signals to the beamformer that a subregion of a block has been updated */ 125 u32 region_update_flags; 126 static_assert(BeamformerParameterRegionFlag_Count <= 32, ""); 127 128 BeamformerComputePipeline pipeline; 129 130 alignas(16) i16 channel_mapping[BeamformerMaxChannelCount]; 131 alignas(16) i16 sparse_elements[BeamformerMaxChannelCount]; 132 alignas(16) u8 transmit_receive_orientations[BeamformerMaxChannelCount]; 133 /* NOTE(rnp): interleaved transmit angle, focal depth pairs */ 134 alignas(16) v2 focal_vectors[BeamformerMaxChannelCount]; 135 } BeamformerParameterBlock; 136 static_assert(sizeof(BeamformerParameterBlock) % alignof(BeamformerParameterBlock) == 0, 137 "sizeof(BeamformerParametersBlock) must be a multiple of its alignment"); 138 139 #define X(k, field) [BeamformerParameterBlockRegion_##k] = offsetof(BeamformerParameterBlock, field), 140 read_only global u16 BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_Count] = { 141 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 142 }; 143 #undef X 144 145 typedef struct { 146 u32 version; 147 148 /* NOTE(rnp): causes future library calls to fail. 149 * see note in beamformer_invalidate_shared_memory() */ 150 b32 invalid; 151 152 /* NOTE(rnp): not used for locking on w32 but we can use these to peek at the status of 153 * the lock without leaving userspace. */ 154 i32 locks[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots]; 155 156 /* NOTE(rnp): total number of parameter block regions the client has requested. 157 * used to calculate offset to scratch space and to track number of allocated 158 * semaphores on w32. Defaults to 1 but can be changed at runtime */ 159 u32 reserved_parameter_blocks; 160 161 /* TODO(rnp): this is really sucky. we need a better way to communicate this */ 162 u64 rf_block_rf_size; 163 164 BeamformerLiveImagingParameters live_imaging_parameters; 165 BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags; 166 167 BeamformWorkQueue external_work_queue; 168 } BeamformerSharedMemory; 169 170 function BeamformWork * 171 beamform_work_queue_pop(BeamformWorkQueue *q) 172 { 173 BeamformWork *result = 0; 174 175 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 176 u64 val = atomic_load_u64(&q->queue); 177 u64 mask = countof(q->work_items) - 1; 178 u64 widx = val & mask; 179 u64 ridx = val >> 32 & mask; 180 181 if (ridx != widx) 182 result = q->work_items + ridx; 183 184 return result; 185 } 186 187 function void 188 beamform_work_queue_pop_commit(BeamformWorkQueue *q) 189 { 190 atomic_add_u64(&q->queue, 0x100000000ULL); 191 } 192 193 function BeamformWork * 194 beamform_work_queue_push(BeamformWorkQueue *q) 195 { 196 BeamformWork *result = 0; 197 198 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 199 u64 val = atomic_load_u64(&q->queue); 200 u64 mask = countof(q->work_items) - 1; 201 u64 widx = val & mask; 202 u64 ridx = val >> 32 & mask; 203 u64 next = (widx + 1) & mask; 204 205 if (val & 0x80000000) 206 atomic_and_u64(&q->queue, ~0x80000000); 207 208 if (next != ridx) { 209 result = q->work_items + widx; 210 zero_struct(result); 211 } 212 213 return result; 214 } 215 216 function void 217 beamform_work_queue_push_commit(BeamformWorkQueue *q) 218 { 219 atomic_add_u64(&q->queue, 1); 220 } 221 222 #if OS_WINDOWS 223 // NOTE(rnp): junk needed on w32 to watch a value across processes while yielding 224 // control back to the kernel. There are user level CPU instructions that allow 225 // this so why w32 can't do it in kernel mode sounds like shitty design to me. 226 DEBUG_IMPORT OSW32Semaphore os_w32_shared_memory_semaphores[countof(((BeamformerSharedMemory *)0)->locks)]; 227 #endif 228 229 function b32 230 beamformer_shared_memory_take_lock(BeamformerSharedMemory *sm, i32 lock, u32 timeout_ms) 231 { 232 #if OS_WINDOWS 233 b32 result = os_w32_semaphore_wait(os_w32_shared_memory_semaphores[lock], timeout_ms); 234 if (result) atomic_store_u32(sm->locks + lock, 1); 235 #else 236 b32 result = take_lock(sm->locks + lock, timeout_ms); 237 #endif 238 return result; 239 } 240 241 function void 242 beamformer_shared_memory_release_lock(BeamformerSharedMemory *sm, i32 lock) 243 { 244 release_lock(sm->locks + lock); 245 #if OS_WINDOWS 246 os_w32_semaphore_release(os_w32_shared_memory_semaphores[lock], 1); 247 #endif 248 } 249 250 function BeamformerParameterBlock * 251 beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block) 252 { 253 assert(sm->reserved_parameter_blocks >= block); 254 BeamformerParameterBlock *result = (typeof(result))((u8 *)(sm + 1) + block * sizeof(*result)); 255 return result; 256 } 257 258 function b32 259 beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block) 260 { 261 b32 result = beamformer_parameter_block(sm, block)->region_update_flags != 0; 262 return result; 263 } 264 265 function BeamformerParameterBlock * 266 beamformer_parameter_block_lock(BeamformerSharedMemory *sm, u32 block, i32 timeout_ms) 267 { 268 assert(block < BeamformerMaxParameterBlockSlots); 269 BeamformerParameterBlock *result = 0; 270 if (beamformer_shared_memory_take_lock(sm, BeamformerSharedMemoryLockKind_Count + block, (u32)timeout_ms)) 271 result = beamformer_parameter_block(sm, block); 272 return result; 273 } 274 275 function void 276 beamformer_parameter_block_unlock(BeamformerSharedMemory *sm, u32 block) 277 { 278 assert(block < BeamformerMaxParameterBlockSlots); 279 beamformer_shared_memory_release_lock(sm, BeamformerSharedMemoryLockKind_Count + block); 280 } 281 282 function Arena 283 beamformer_shared_memory_scratch_arena(BeamformerSharedMemory *sm, i64 shared_memory_size) 284 { 285 assert(sm->reserved_parameter_blocks > 0); 286 BeamformerParameterBlock *last = beamformer_parameter_block(sm, sm->reserved_parameter_blocks); 287 Arena result = {.beg = (u8 *)(last + 1), .end = (u8 *)sm + shared_memory_size}; 288 result.beg = arena_aligned_start(result, KB(4)); 289 return result; 290 } 291 292 function void 293 mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region) 294 { 295 BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block); 296 atomic_or_u32(&pb->region_update_flags, 1u << region); 297 } 298 299 function void 300 post_sync_barrier(BeamformerSharedMemory *sm, BeamformerSharedMemoryLockKind lock) 301 { 302 /* NOTE(rnp): debug: here it is not a bug to release the lock if it 303 * isn't held but elswhere it is */ 304 DEBUG_DECL(if (sm->locks[lock])) { 305 beamformer_shared_memory_release_lock(sm, lock); 306 } 307 }