beamformer_shared_memory.c (9278B)
1 /* See LICENSE for license details. */ 2 #define BEAMFORMER_SHARED_MEMORY_VERSION (16UL) 3 4 typedef struct BeamformerFrame BeamformerFrame; 5 6 typedef enum { 7 BeamformerWorkKind_Compute, 8 BeamformerWorkKind_ComputeIndirect, 9 BeamformerWorkKind_CreateFilter, 10 BeamformerWorkKind_ReloadShader, 11 BeamformerWorkKind_ExportBuffer, 12 BeamformerWorkKind_UploadBuffer, 13 } BeamformerWorkKind; 14 15 /* TODO(rnp): this is massively bloating the queue; think of some other 16 * way to communicate these to the beamformer */ 17 typedef struct { 18 union { 19 #define X(kind, ...) struct {__VA_ARGS__ ;} kind; 20 BEAMFORMER_FILTER_KIND_LIST(f32, ;) 21 #undef X 22 }; 23 f32 sampling_frequency; 24 b16 complex; 25 } BeamformerFilterParameters; 26 27 typedef struct { 28 BeamformerFilterKind kind; 29 BeamformerFilterParameters parameters; 30 u8 filter_slot; 31 u8 parameter_block; 32 static_assert(BeamformerFilterSlots <= 255, "CreateFilterContext only supports 255 filter slots"); 33 static_assert(BeamformerMaxParameterBlockSlots <= 255, "CreateFilterContext only supports 255 parameter blocks"); 34 } BeamformerCreateFilterContext; 35 36 typedef enum { 37 BeamformerExportKind_BeamformedData, 38 BeamformerExportKind_Stats, 39 } BeamformerExportKind; 40 41 typedef struct { 42 BeamformerExportKind kind; 43 u32 size; 44 } BeamformerExportContext; 45 46 #define BEAMFORMER_SHARED_MEMORY_LOCKS \ 47 X(ScratchSpace) \ 48 X(UploadRF) \ 49 X(ExportSync) \ 50 X(DispatchCompute) 51 52 #define X(name) BeamformerSharedMemoryLockKind_##name, 53 typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Count} BeamformerSharedMemoryLockKind; 54 #undef X 55 56 typedef struct { 57 BeamformerFrame *frame; 58 u32 parameter_block; 59 } BeamformerComputeWorkContext; 60 61 typedef struct { 62 BeamformerViewPlaneTag view_plane; 63 u32 parameter_block; 64 } BeamformerComputeIndirectWorkContext; 65 66 /* NOTE: discriminated union based on type */ 67 typedef struct { 68 BeamformerWorkKind kind; 69 BeamformerSharedMemoryLockKind lock; 70 union { 71 void *generic; 72 BeamformerComputeWorkContext compute_context; 73 BeamformerComputeIndirectWorkContext compute_indirect_context; 74 BeamformerCreateFilterContext create_filter_context; 75 BeamformerExportContext export_context; 76 BeamformerShaderKind reload_shader; 77 }; 78 } BeamformWork; 79 80 typedef struct { 81 union { 82 u64 queue; 83 struct {u32 widx, ridx;}; 84 }; 85 BeamformWork work_items[1 << 6]; 86 } BeamformWorkQueue; 87 88 #define BEAMFORMER_SHARED_MEMORY_SIZE (GB(2)) 89 #define BEAMFORMER_SHARED_MEMORY_MAX_SCRATCH_SIZE (BEAMFORMER_SHARED_MEMORY_SIZE - \ 90 sizeof(BeamformerSharedMemory) - \ 91 sizeof(BeamformerParameterBlock)) 92 93 #define X(name, id) BeamformerLiveImagingDirtyFlags_##name = (1 << id), 94 typedef enum {BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST} BeamformerLiveImagingDirtyFlags; 95 #undef X 96 97 #define BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \ 98 X(ComputePipeline, pipeline) \ 99 X(ChannelMapping, channel_mapping) \ 100 X(FocalVectors, focal_vectors) \ 101 X(Parameters, parameters) \ 102 X(SparseElements, sparse_elements) \ 103 X(TransmitReceiveOrientations, transmit_receive_orientations) 104 105 typedef enum { 106 #define X(k, ...) BeamformerParameterBlockRegion_##k, 107 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 108 #undef X 109 BeamformerParameterBlockRegion_Count 110 } BeamformerParameterBlockRegions; 111 112 typedef union { 113 u8 filter_slot; 114 } BeamformerShaderParameters; 115 116 typedef struct { 117 BeamformerShaderKind shaders[BeamformerMaxComputeShaderStages]; 118 BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages]; 119 u32 shader_count; 120 BeamformerDataKind data_kind; 121 } BeamformerComputePipeline; 122 123 typedef struct { 124 alignas(16) union { 125 BeamformerParameters parameters; 126 struct { 127 BeamformerParametersHead parameters_head; 128 BeamformerUIParameters parameters_ui; 129 }; 130 }; 131 132 /* NOTE(rnp): signals to the beamformer that a subregion of a block has been updated */ 133 u32 dirty_regions; 134 static_assert(BeamformerParameterBlockRegion_Count <= 32, "only 32 parameter block regions supported"); 135 136 BeamformerComputePipeline pipeline; 137 138 alignas(16) i16 channel_mapping[BeamformerMaxChannelCount]; 139 alignas(16) i16 sparse_elements[BeamformerMaxChannelCount]; 140 alignas(16) u8 transmit_receive_orientations[BeamformerMaxChannelCount]; 141 /* NOTE(rnp): interleaved transmit angle, focal depth pairs */ 142 alignas(16) v2 focal_vectors[BeamformerMaxChannelCount]; 143 } BeamformerParameterBlock; 144 static_assert(sizeof(BeamformerParameterBlock) % alignof(BeamformerParameterBlock) == 0, 145 "sizeof(BeamformerParametersBlock) must be a multiple of its alignment"); 146 147 #define X(k, field) [BeamformerParameterBlockRegion_##k] = offsetof(BeamformerParameterBlock, field), 148 read_only global u16 BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_Count] = { 149 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 150 }; 151 #undef X 152 153 typedef struct { 154 u32 version; 155 156 /* NOTE(rnp): causes future library calls to fail. 157 * see note in beamformer_invalidate_shared_memory() */ 158 b32 invalid; 159 160 /* NOTE(rnp): not used for locking on w32 but we can use these to peek at the status of 161 * the lock without leaving userspace. */ 162 i32 locks[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlockSlots]; 163 164 /* NOTE(rnp): total number of parameter block regions the client has requested. 165 * used to calculate offset to scratch space and to track number of allocated 166 * semaphores on w32. Defaults to 1 but can be changed at runtime */ 167 u32 reserved_parameter_blocks; 168 169 /* TODO(rnp): this is really sucky. we need a better way to communicate this */ 170 u32 scratch_rf_size; 171 172 BeamformerLiveImagingParameters live_imaging_parameters; 173 BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags; 174 175 BeamformWorkQueue external_work_queue; 176 } BeamformerSharedMemory; 177 178 function BeamformWork * 179 beamform_work_queue_pop(BeamformWorkQueue *q) 180 { 181 BeamformWork *result = 0; 182 183 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 184 u64 val = atomic_load_u64(&q->queue); 185 u64 mask = countof(q->work_items) - 1; 186 u64 widx = val & mask; 187 u64 ridx = val >> 32 & mask; 188 189 if (ridx != widx) 190 result = q->work_items + ridx; 191 192 return result; 193 } 194 195 function void 196 beamform_work_queue_pop_commit(BeamformWorkQueue *q) 197 { 198 atomic_add_u64(&q->queue, 0x100000000ULL); 199 } 200 201 function BeamformWork * 202 beamform_work_queue_push(BeamformWorkQueue *q) 203 { 204 BeamformWork *result = 0; 205 206 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 207 u64 val = atomic_load_u64(&q->queue); 208 u64 mask = countof(q->work_items) - 1; 209 u64 widx = val & mask; 210 u64 ridx = val >> 32 & mask; 211 u64 next = (widx + 1) & mask; 212 213 if (val & 0x80000000) 214 atomic_and_u64(&q->queue, ~0x80000000); 215 216 if (next != ridx) { 217 result = q->work_items + widx; 218 zero_struct(result); 219 } 220 221 return result; 222 } 223 224 function void 225 beamform_work_queue_push_commit(BeamformWorkQueue *q) 226 { 227 atomic_add_u64(&q->queue, 1); 228 } 229 230 function BeamformerParameterBlock * 231 beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block) 232 { 233 assert(sm->reserved_parameter_blocks >= block); 234 BeamformerParameterBlock *result = (typeof(result))((u8 *)(sm + 1) + block * sizeof(*result)); 235 return result; 236 } 237 238 function b32 239 beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block) 240 { 241 b32 result = beamformer_parameter_block(sm, block)->dirty_regions != 0; 242 return result; 243 } 244 245 function BeamformerParameterBlock * 246 beamformer_parameter_block_lock(SharedMemoryRegion *sm, u32 block, i32 timeout_ms) 247 { 248 assert(block < BeamformerMaxParameterBlockSlots); 249 BeamformerSharedMemory *b = sm->region; 250 BeamformerParameterBlock *result = 0; 251 if (os_shared_memory_region_lock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block, (u32)timeout_ms)) 252 result = beamformer_parameter_block(sm->region, block); 253 return result; 254 } 255 256 function void 257 beamformer_parameter_block_unlock(SharedMemoryRegion *sm, u32 block) 258 { 259 assert(block < BeamformerMaxParameterBlockSlots); 260 BeamformerSharedMemory *b = sm->region; 261 os_shared_memory_region_unlock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block); 262 } 263 264 function Arena 265 beamformer_shared_memory_scratch_arena(BeamformerSharedMemory *sm) 266 { 267 assert(sm->reserved_parameter_blocks > 0); 268 BeamformerParameterBlock *last = beamformer_parameter_block(sm, sm->reserved_parameter_blocks); 269 Arena result = {.beg = (u8 *)(last + 1), .end = (u8 *)sm + BEAMFORMER_SHARED_MEMORY_SIZE}; 270 result.beg = arena_aligned_start(result, KB(4)); 271 return result; 272 } 273 274 function void 275 mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region) 276 { 277 BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block); 278 atomic_or_u32(&pb->dirty_regions, 1u << region); 279 } 280 281 function void 282 post_sync_barrier(SharedMemoryRegion *sm, BeamformerSharedMemoryLockKind lock, i32 *locks) 283 { 284 /* NOTE(rnp): debug: here it is not a bug to release the lock if it 285 * isn't held but elswhere it is */ 286 DEBUG_DECL(if (locks[lock])) { 287 os_shared_memory_region_unlock(sm, locks, (i32)lock); 288 } 289 }