beamformer_shared_memory.c (9409B)
1 /* See LICENSE for license details. */ 2 #define BEAMFORMER_SHARED_MEMORY_VERSION (24UL) 3 4 typedef struct BeamformerFrame BeamformerFrame; 5 6 typedef enum { 7 BeamformerWorkKind_Compute, 8 BeamformerWorkKind_ComputeIndirect, 9 BeamformerWorkKind_CreateFilter, 10 BeamformerWorkKind_ExportBuffer, 11 BeamformerWorkKind_UploadBuffer, 12 } BeamformerWorkKind; 13 14 typedef struct { 15 BeamformerFilterParameters parameters; 16 u8 filter_slot; 17 u8 parameter_block; 18 static_assert(BeamformerFilterSlots <= 255, "CreateFilterContext only supports 255 filter slots"); 19 static_assert(BeamformerMaxParameterBlockSlots <= 255, "CreateFilterContext only supports 255 parameter blocks"); 20 } BeamformerCreateFilterContext; 21 22 typedef enum { 23 BeamformerExportKind_BeamformedData, 24 BeamformerExportKind_Stats, 25 } BeamformerExportKind; 26 27 typedef struct { 28 BeamformerExportKind kind; 29 u32 size; 30 } BeamformerExportContext; 31 32 #define BEAMFORMER_SHARED_MEMORY_LOCKS \ 33 X(ScratchSpace) \ 34 X(UploadRF) \ 35 X(ExportSync) \ 36 X(DispatchCompute) 37 38 #define X(name) BeamformerSharedMemoryLockKind_##name, 39 typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Count} BeamformerSharedMemoryLockKind; 40 #undef X 41 42 typedef struct { 43 BeamformerFrame *frame; 44 u32 parameter_block; 45 } BeamformerComputeWorkContext; 46 47 typedef struct { 48 BeamformerViewPlaneTag view_plane; 49 u32 parameter_block; 50 } BeamformerComputeIndirectWorkContext; 51 52 /* NOTE: discriminated union based on type */ 53 typedef struct { 54 BeamformerWorkKind kind; 55 BeamformerSharedMemoryLockKind lock; 56 union { 57 void *generic; 58 BeamformerComputeWorkContext compute_context; 59 BeamformerComputeIndirectWorkContext compute_indirect_context; 60 BeamformerCreateFilterContext create_filter_context; 61 BeamformerExportContext export_context; 62 BeamformerShaderKind reload_shader; 63 }; 64 } BeamformWork; 65 66 typedef struct { 67 union { 68 u64 queue; 69 struct {u32 widx, ridx;}; 70 }; 71 BeamformWork work_items[1 << 6]; 72 } BeamformWorkQueue; 73 74 #define X(name, id) BeamformerLiveImagingDirtyFlags_##name = (1 << id), 75 typedef enum {BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST} BeamformerLiveImagingDirtyFlags; 76 #undef X 77 78 #define BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \ 79 X(ComputePipeline, pipeline) \ 80 X(ChannelMapping, channel_mapping) \ 81 X(FocalVectors, focal_vectors) \ 82 X(Parameters, parameters) \ 83 X(SparseElements, sparse_elements) \ 84 X(TransmitReceiveOrientations, transmit_receive_orientations) 85 86 typedef enum { 87 #define X(k, ...) BeamformerParameterBlockRegion_##k, 88 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 89 #undef X 90 BeamformerParameterBlockRegion_Count 91 } BeamformerParameterBlockRegions; 92 93 typedef union { 94 u8 filter_slot; 95 } BeamformerShaderParameters; 96 97 typedef struct { 98 BeamformerShaderKind shaders[BeamformerMaxComputeShaderStages]; 99 BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages]; 100 u32 shader_count; 101 BeamformerDataKind data_kind; 102 } BeamformerComputePipeline; 103 104 typedef struct { 105 alignas(16) union { 106 BeamformerParameters parameters; 107 struct { 108 BeamformerParametersHead parameters_head; 109 BeamformerUIParameters parameters_ui; 110 }; 111 }; 112 113 /* NOTE(rnp): signals to the beamformer that a subregion of a block has been updated */ 114 u32 dirty_regions; 115 static_assert(BeamformerParameterBlockRegion_Count <= 32, "only 32 parameter block regions supported"); 116 117 BeamformerComputePipeline pipeline; 118 119 alignas(16) i16 channel_mapping[BeamformerMaxChannelCount]; 120 alignas(16) i16 sparse_elements[BeamformerMaxChannelCount]; 121 alignas(16) u8 transmit_receive_orientations[BeamformerMaxChannelCount]; 122 /* NOTE(rnp): interleaved transmit angle, focal depth pairs */ 123 alignas(16) v2 focal_vectors[BeamformerMaxChannelCount]; 124 } BeamformerParameterBlock; 125 static_assert(sizeof(BeamformerParameterBlock) % alignof(BeamformerParameterBlock) == 0, 126 "sizeof(BeamformerParametersBlock) must be a multiple of its alignment"); 127 128 #define X(k, field) [BeamformerParameterBlockRegion_##k] = offsetof(BeamformerParameterBlock, field), 129 read_only global u16 BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_Count] = { 130 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 131 }; 132 #undef X 133 134 typedef struct { 135 u32 version; 136 137 /* NOTE(rnp): causes future library calls to fail. 138 * see note in beamformer_invalidate_shared_memory() */ 139 b32 invalid; 140 141 /* NOTE(rnp): not used for locking on w32 but we can use these to peek at the status of 142 * the lock without leaving userspace. */ 143 i32 locks[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots]; 144 145 /* NOTE(rnp): total number of parameter block regions the client has requested. 146 * used to calculate offset to scratch space and to track number of allocated 147 * semaphores on w32. Defaults to 1 but can be changed at runtime */ 148 u32 reserved_parameter_blocks; 149 150 /* TODO(rnp): this is really sucky. we need a better way to communicate this */ 151 u64 rf_block_rf_size; 152 153 BeamformerLiveImagingParameters live_imaging_parameters; 154 BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags; 155 156 BeamformWorkQueue external_work_queue; 157 } BeamformerSharedMemory; 158 159 function BeamformWork * 160 beamform_work_queue_pop(BeamformWorkQueue *q) 161 { 162 BeamformWork *result = 0; 163 164 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 165 u64 val = atomic_load_u64(&q->queue); 166 u64 mask = countof(q->work_items) - 1; 167 u64 widx = val & mask; 168 u64 ridx = val >> 32 & mask; 169 170 if (ridx != widx) 171 result = q->work_items + ridx; 172 173 return result; 174 } 175 176 function void 177 beamform_work_queue_pop_commit(BeamformWorkQueue *q) 178 { 179 atomic_add_u64(&q->queue, 0x100000000ULL); 180 } 181 182 function BeamformWork * 183 beamform_work_queue_push(BeamformWorkQueue *q) 184 { 185 BeamformWork *result = 0; 186 187 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 188 u64 val = atomic_load_u64(&q->queue); 189 u64 mask = countof(q->work_items) - 1; 190 u64 widx = val & mask; 191 u64 ridx = val >> 32 & mask; 192 u64 next = (widx + 1) & mask; 193 194 if (val & 0x80000000) 195 atomic_and_u64(&q->queue, ~0x80000000); 196 197 if (next != ridx) { 198 result = q->work_items + widx; 199 zero_struct(result); 200 } 201 202 return result; 203 } 204 205 function void 206 beamform_work_queue_push_commit(BeamformWorkQueue *q) 207 { 208 atomic_add_u64(&q->queue, 1); 209 } 210 211 #if OS_WINDOWS 212 // NOTE(rnp): junk needed on w32 to watch a value across processes while yielding 213 // control back to the kernel. There are user level CPU instructions that allow 214 // this so why w32 can't do it in kernel mode sounds like shitty design to me. 215 DEBUG_IMPORT OSW32Semaphore os_w32_shared_memory_semaphores[countof(((BeamformerSharedMemory *)0)->locks)]; 216 #endif 217 218 function b32 219 beamformer_shared_memory_take_lock(BeamformerSharedMemory *sm, i32 lock, u32 timeout_ms) 220 { 221 #if OS_WINDOWS 222 b32 result = os_w32_semaphore_wait(os_w32_shared_memory_semaphores[lock], timeout_ms); 223 if (result) atomic_store_u32(sm->locks + lock, 1); 224 #else 225 b32 result = take_lock(sm->locks + lock, timeout_ms); 226 #endif 227 return result; 228 } 229 230 function void 231 beamformer_shared_memory_release_lock(BeamformerSharedMemory *sm, i32 lock) 232 { 233 release_lock(sm->locks + lock); 234 #if OS_WINDOWS 235 os_w32_semaphore_release(os_w32_shared_memory_semaphores[lock], 1); 236 #endif 237 } 238 239 function BeamformerParameterBlock * 240 beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block) 241 { 242 assert(sm->reserved_parameter_blocks >= block); 243 BeamformerParameterBlock *result = (typeof(result))((u8 *)(sm + 1) + block * sizeof(*result)); 244 return result; 245 } 246 247 function b32 248 beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block) 249 { 250 b32 result = beamformer_parameter_block(sm, block)->dirty_regions != 0; 251 return result; 252 } 253 254 function BeamformerParameterBlock * 255 beamformer_parameter_block_lock(BeamformerSharedMemory *sm, u32 block, i32 timeout_ms) 256 { 257 assert(block < BeamformerMaxParameterBlockSlots); 258 BeamformerParameterBlock *result = 0; 259 if (beamformer_shared_memory_take_lock(sm, BeamformerSharedMemoryLockKind_Count + block, (u32)timeout_ms)) 260 result = beamformer_parameter_block(sm, block); 261 return result; 262 } 263 264 function void 265 beamformer_parameter_block_unlock(BeamformerSharedMemory *sm, u32 block) 266 { 267 assert(block < BeamformerMaxParameterBlockSlots); 268 beamformer_shared_memory_release_lock(sm, BeamformerSharedMemoryLockKind_Count + block); 269 } 270 271 function Arena 272 beamformer_shared_memory_scratch_arena(BeamformerSharedMemory *sm, i64 shared_memory_size) 273 { 274 assert(sm->reserved_parameter_blocks > 0); 275 BeamformerParameterBlock *last = beamformer_parameter_block(sm, sm->reserved_parameter_blocks); 276 Arena result = {.beg = (u8 *)(last + 1), .end = (u8 *)sm + shared_memory_size}; 277 result.beg = arena_aligned_start(result, KB(4)); 278 return result; 279 } 280 281 function void 282 mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region) 283 { 284 BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block); 285 atomic_or_u32(&pb->dirty_regions, 1u << region); 286 } 287 288 function void 289 post_sync_barrier(BeamformerSharedMemory *sm, BeamformerSharedMemoryLockKind lock) 290 { 291 /* NOTE(rnp): debug: here it is not a bug to release the lock if it 292 * isn't held but elswhere it is */ 293 DEBUG_DECL(if (sm->locks[lock])) { 294 beamformer_shared_memory_release_lock(sm, lock); 295 } 296 }