beamformer_shared_memory.c (9711B)
1 /* See LICENSE for license details. */ 2 #define BEAMFORMER_SHARED_MEMORY_VERSION (23UL) 3 4 typedef struct BeamformerFrame BeamformerFrame; 5 6 typedef enum { 7 BeamformerWorkKind_Compute, 8 BeamformerWorkKind_ComputeIndirect, 9 BeamformerWorkKind_CreateFilter, 10 BeamformerWorkKind_ExportBuffer, 11 BeamformerWorkKind_UploadBuffer, 12 } BeamformerWorkKind; 13 14 typedef struct { 15 BeamformerFilterParameters parameters; 16 u8 filter_slot; 17 u8 parameter_block; 18 static_assert(BeamformerFilterSlots <= 255, "CreateFilterContext only supports 255 filter slots"); 19 static_assert(BeamformerMaxParameterBlockSlots <= 255, "CreateFilterContext only supports 255 parameter blocks"); 20 } BeamformerCreateFilterContext; 21 22 typedef enum { 23 BeamformerExportKind_BeamformedData, 24 BeamformerExportKind_Stats, 25 } BeamformerExportKind; 26 27 typedef struct { 28 BeamformerExportKind kind; 29 u32 size; 30 } BeamformerExportContext; 31 32 #define BEAMFORMER_SHARED_MEMORY_LOCKS \ 33 X(ScratchSpace) \ 34 X(UploadRF) \ 35 X(ExportSync) \ 36 X(DispatchCompute) 37 38 #define X(name) BeamformerSharedMemoryLockKind_##name, 39 typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Count} BeamformerSharedMemoryLockKind; 40 #undef X 41 42 typedef struct { 43 BeamformerFrame *frame; 44 u32 parameter_block; 45 } BeamformerComputeWorkContext; 46 47 typedef struct { 48 BeamformerViewPlaneTag view_plane; 49 u32 parameter_block; 50 } BeamformerComputeIndirectWorkContext; 51 52 /* NOTE: discriminated union based on type */ 53 typedef struct { 54 BeamformerWorkKind kind; 55 BeamformerSharedMemoryLockKind lock; 56 union { 57 void *generic; 58 BeamformerComputeWorkContext compute_context; 59 BeamformerComputeIndirectWorkContext compute_indirect_context; 60 BeamformerCreateFilterContext create_filter_context; 61 BeamformerExportContext export_context; 62 BeamformerShaderKind reload_shader; 63 }; 64 } BeamformWork; 65 66 typedef struct { 67 union { 68 u64 queue; 69 struct {u32 widx, ridx;}; 70 }; 71 BeamformWork work_items[1 << 6]; 72 } BeamformWorkQueue; 73 74 #define BEAMFORMER_SHARED_MEMORY_SIZE (GB(2)) 75 #define BEAMFORMER_SHARED_MEMORY_MAX_SCRATCH_SIZE (BEAMFORMER_SHARED_MEMORY_SIZE - \ 76 sizeof(BeamformerSharedMemory) - \ 77 sizeof(BeamformerParameterBlock)) 78 79 #define X(name, id) BeamformerLiveImagingDirtyFlags_##name = (1 << id), 80 typedef enum {BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST} BeamformerLiveImagingDirtyFlags; 81 #undef X 82 83 #define BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \ 84 X(ComputePipeline, pipeline) \ 85 X(ChannelMapping, channel_mapping) \ 86 X(FocalVectors, focal_vectors) \ 87 X(Parameters, parameters) \ 88 X(SparseElements, sparse_elements) \ 89 X(TransmitReceiveOrientations, transmit_receive_orientations) 90 91 typedef enum { 92 #define X(k, ...) BeamformerParameterBlockRegion_##k, 93 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 94 #undef X 95 BeamformerParameterBlockRegion_Count 96 } BeamformerParameterBlockRegions; 97 98 typedef union { 99 u8 filter_slot; 100 } BeamformerShaderParameters; 101 102 typedef struct { 103 BeamformerShaderKind shaders[BeamformerMaxComputeShaderStages]; 104 BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages]; 105 u32 shader_count; 106 BeamformerDataKind data_kind; 107 } BeamformerComputePipeline; 108 109 typedef struct { 110 alignas(16) union { 111 BeamformerParameters parameters; 112 struct { 113 BeamformerParametersHead parameters_head; 114 BeamformerUIParameters parameters_ui; 115 }; 116 }; 117 118 /* NOTE(rnp): signals to the beamformer that a subregion of a block has been updated */ 119 u32 dirty_regions; 120 static_assert(BeamformerParameterBlockRegion_Count <= 32, "only 32 parameter block regions supported"); 121 122 BeamformerComputePipeline pipeline; 123 124 alignas(16) i16 channel_mapping[BeamformerMaxChannelCount]; 125 alignas(16) i16 sparse_elements[BeamformerMaxChannelCount]; 126 alignas(16) u8 transmit_receive_orientations[BeamformerMaxChannelCount]; 127 /* NOTE(rnp): interleaved transmit angle, focal depth pairs */ 128 alignas(16) v2 focal_vectors[BeamformerMaxChannelCount]; 129 } BeamformerParameterBlock; 130 static_assert(sizeof(BeamformerParameterBlock) % alignof(BeamformerParameterBlock) == 0, 131 "sizeof(BeamformerParametersBlock) must be a multiple of its alignment"); 132 133 #define X(k, field) [BeamformerParameterBlockRegion_##k] = offsetof(BeamformerParameterBlock, field), 134 read_only global u16 BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_Count] = { 135 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 136 }; 137 #undef X 138 139 typedef struct { 140 u32 version; 141 142 /* NOTE(rnp): causes future library calls to fail. 143 * see note in beamformer_invalidate_shared_memory() */ 144 b32 invalid; 145 146 /* NOTE(rnp): not used for locking on w32 but we can use these to peek at the status of 147 * the lock without leaving userspace. */ 148 i32 locks[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots]; 149 150 /* NOTE(rnp): total number of parameter block regions the client has requested. 151 * used to calculate offset to scratch space and to track number of allocated 152 * semaphores on w32. Defaults to 1 but can be changed at runtime */ 153 u32 reserved_parameter_blocks; 154 155 /* TODO(rnp): this is really sucky. we need a better way to communicate this */ 156 u64 rf_block_rf_size; 157 158 BeamformerLiveImagingParameters live_imaging_parameters; 159 BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags; 160 161 BeamformWorkQueue external_work_queue; 162 } BeamformerSharedMemory; 163 164 function BeamformWork * 165 beamform_work_queue_pop(BeamformWorkQueue *q) 166 { 167 BeamformWork *result = 0; 168 169 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 170 u64 val = atomic_load_u64(&q->queue); 171 u64 mask = countof(q->work_items) - 1; 172 u64 widx = val & mask; 173 u64 ridx = val >> 32 & mask; 174 175 if (ridx != widx) 176 result = q->work_items + ridx; 177 178 return result; 179 } 180 181 function void 182 beamform_work_queue_pop_commit(BeamformWorkQueue *q) 183 { 184 atomic_add_u64(&q->queue, 0x100000000ULL); 185 } 186 187 function BeamformWork * 188 beamform_work_queue_push(BeamformWorkQueue *q) 189 { 190 BeamformWork *result = 0; 191 192 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 193 u64 val = atomic_load_u64(&q->queue); 194 u64 mask = countof(q->work_items) - 1; 195 u64 widx = val & mask; 196 u64 ridx = val >> 32 & mask; 197 u64 next = (widx + 1) & mask; 198 199 if (val & 0x80000000) 200 atomic_and_u64(&q->queue, ~0x80000000); 201 202 if (next != ridx) { 203 result = q->work_items + widx; 204 zero_struct(result); 205 } 206 207 return result; 208 } 209 210 function void 211 beamform_work_queue_push_commit(BeamformWorkQueue *q) 212 { 213 atomic_add_u64(&q->queue, 1); 214 } 215 216 #if OS_WINDOWS 217 // NOTE(rnp): junk needed on w32 to watch a value across processes while yielding 218 // control back to the kernel. There are user level CPU instructions that allow 219 // this so why w32 can't do it in kernel mode sounds like shitty design to me. 220 DEBUG_IMPORT OSW32Semaphore os_w32_shared_memory_semaphores[countof(((BeamformerSharedMemory *)0)->locks)]; 221 #endif 222 223 function b32 224 beamformer_shared_memory_take_lock(BeamformerSharedMemory *sm, i32 lock, u32 timeout_ms) 225 { 226 #if OS_WINDOWS 227 b32 result = os_w32_semaphore_wait(os_w32_shared_memory_semaphores[lock], timeout_ms); 228 if (result) atomic_store_u32(sm->locks + lock, 1); 229 #else 230 b32 result = take_lock(sm->locks + lock, timeout_ms); 231 #endif 232 return result; 233 } 234 235 function void 236 beamformer_shared_memory_release_lock(BeamformerSharedMemory *sm, i32 lock) 237 { 238 release_lock(sm->locks + lock); 239 #if OS_WINDOWS 240 os_w32_semaphore_release(os_w32_shared_memory_semaphores[lock], 1); 241 #endif 242 } 243 244 function BeamformerParameterBlock * 245 beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block) 246 { 247 assert(sm->reserved_parameter_blocks >= block); 248 BeamformerParameterBlock *result = (typeof(result))((u8 *)(sm + 1) + block * sizeof(*result)); 249 return result; 250 } 251 252 function b32 253 beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block) 254 { 255 b32 result = beamformer_parameter_block(sm, block)->dirty_regions != 0; 256 return result; 257 } 258 259 function BeamformerParameterBlock * 260 beamformer_parameter_block_lock(BeamformerSharedMemory *sm, u32 block, i32 timeout_ms) 261 { 262 assert(block < BeamformerMaxParameterBlockSlots); 263 BeamformerParameterBlock *result = 0; 264 if (beamformer_shared_memory_take_lock(sm, BeamformerSharedMemoryLockKind_Count + block, (u32)timeout_ms)) 265 result = beamformer_parameter_block(sm, block); 266 return result; 267 } 268 269 function void 270 beamformer_parameter_block_unlock(BeamformerSharedMemory *sm, u32 block) 271 { 272 assert(block < BeamformerMaxParameterBlockSlots); 273 beamformer_shared_memory_release_lock(sm, BeamformerSharedMemoryLockKind_Count + block); 274 } 275 276 function Arena 277 beamformer_shared_memory_scratch_arena(BeamformerSharedMemory *sm) 278 { 279 assert(sm->reserved_parameter_blocks > 0); 280 BeamformerParameterBlock *last = beamformer_parameter_block(sm, sm->reserved_parameter_blocks); 281 Arena result = {.beg = (u8 *)(last + 1), .end = (u8 *)sm + BEAMFORMER_SHARED_MEMORY_SIZE}; 282 result.beg = arena_aligned_start(result, KB(4)); 283 return result; 284 } 285 286 function void 287 mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region) 288 { 289 BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block); 290 atomic_or_u32(&pb->dirty_regions, 1u << region); 291 } 292 293 function void 294 post_sync_barrier(BeamformerSharedMemory *sm, BeamformerSharedMemoryLockKind lock) 295 { 296 /* NOTE(rnp): debug: here it is not a bug to release the lock if it 297 * isn't held but elswhere it is */ 298 DEBUG_DECL(if (sm->locks[lock])) { 299 beamformer_shared_memory_release_lock(sm, lock); 300 } 301 }