beamformer_shared_memory.c (9166B)
1 /* See LICENSE for license details. */ 2 #define BEAMFORMER_SHARED_MEMORY_VERSION (12UL) 3 4 typedef struct BeamformerFrame BeamformerFrame; 5 typedef struct ShaderReloadContext ShaderReloadContext; 6 7 typedef enum { 8 BeamformerWorkKind_Compute, 9 BeamformerWorkKind_ComputeIndirect, 10 BeamformerWorkKind_CreateFilter, 11 BeamformerWorkKind_ReloadShader, 12 BeamformerWorkKind_ExportBuffer, 13 BeamformerWorkKind_UploadBuffer, 14 } BeamformerWorkKind; 15 16 typedef struct { 17 union { 18 struct {f32 beta; f32 cutoff_frequency;}; 19 f32 xdc_center_frequency; 20 }; 21 f32 sampling_frequency; 22 i16 length; 23 } BeamformerFilterParameters; 24 25 typedef struct { 26 BeamformerFilterKind kind; 27 BeamformerFilterParameters parameters; 28 u8 filter_slot; 29 u8 parameter_block; 30 static_assert(BeamformerFilterSlots <= 255, "CreateFilterContext only supports 255 filter slots"); 31 static_assert(BeamformerMaxParameterBlockSlots <= 255, "CreateFilterContext only supports 255 parameter blocks"); 32 } BeamformerCreateFilterContext; 33 34 typedef enum { 35 BeamformerExportKind_BeamformedData, 36 BeamformerExportKind_Stats, 37 } BeamformerExportKind; 38 39 typedef struct { 40 BeamformerExportKind kind; 41 u32 size; 42 } BeamformerExportContext; 43 44 /* TODO(rnp): remove the None lock */ 45 #define BEAMFORMER_SHARED_MEMORY_LOCKS \ 46 X(None) \ 47 X(ScratchSpace) \ 48 X(UploadRF) \ 49 X(ExportSync) \ 50 X(DispatchCompute) 51 52 #define X(name) BeamformerSharedMemoryLockKind_##name, 53 typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Count} BeamformerSharedMemoryLockKind; 54 #undef X 55 56 typedef struct { 57 BeamformerFrame *frame; 58 u32 parameter_block; 59 } BeamformerComputeWorkContext; 60 61 typedef struct { 62 BeamformerViewPlaneTag view_plane; 63 u32 parameter_block; 64 } BeamformerComputeIndirectWorkContext; 65 66 /* NOTE: discriminated union based on type */ 67 typedef struct { 68 BeamformerWorkKind kind; 69 BeamformerSharedMemoryLockKind lock; 70 union { 71 void *generic; 72 BeamformerComputeWorkContext compute_context; 73 BeamformerComputeIndirectWorkContext compute_indirect_context; 74 BeamformerCreateFilterContext create_filter_context; 75 BeamformerExportContext export_context; 76 ShaderReloadContext *shader_reload_context; 77 }; 78 } BeamformWork; 79 80 typedef struct { 81 union { 82 u64 queue; 83 struct {u32 widx, ridx;}; 84 }; 85 BeamformWork work_items[1 << 6]; 86 } BeamformWorkQueue; 87 88 #define BEAMFORMER_SHARED_MEMORY_SIZE (GB(2)) 89 #define BEAMFORMER_SHARED_MEMORY_MIN_SCRATCH_SIZE (BEAMFORMER_SHARED_MEMORY_SIZE - \ 90 sizeof(BeamformerSharedMemory) - \ 91 sizeof(BeamformerParameterBlock)) 92 93 #define X(name, id) BeamformerLiveImagingDirtyFlags_##name = (1 << id), 94 typedef enum {BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST} BeamformerLiveImagingDirtyFlags; 95 #undef X 96 97 #define BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \ 98 X(ComputePipeline, pipeline) \ 99 X(ChannelMapping, channel_mapping) \ 100 X(FocalVectors, focal_vectors) \ 101 X(Parameters, parameters) \ 102 X(SparseElements, sparse_elements) 103 104 typedef enum { 105 #define X(k, ...) BeamformerParameterBlockRegion_##k, 106 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 107 #undef X 108 BeamformerParameterBlockRegion_Count 109 } BeamformerParameterBlockRegions; 110 111 typedef union { 112 u8 filter_slot; 113 } BeamformerShaderParameters; 114 115 typedef struct { 116 BeamformerShaderKind shaders[BeamformerMaxComputeShaderStages]; 117 BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages]; 118 u32 shader_count; 119 BeamformerDataKind data_kind; 120 } BeamformerComputePipeline; 121 122 typedef struct { 123 alignas(16) union { 124 BeamformerParameters parameters; 125 struct { 126 BeamformerParametersHead parameters_head; 127 BeamformerUIParameters parameters_ui; 128 BeamformerParametersTail parameters_tail; 129 }; 130 }; 131 132 /* NOTE(rnp): signals to the beamformer that a subregion of a block has been updated */ 133 u32 dirty_regions; 134 static_assert(BeamformerParameterBlockRegion_Count <= 32, "only 32 parameter block regions supported"); 135 136 BeamformerComputePipeline pipeline; 137 138 alignas(16) i16 channel_mapping[BeamformerMaxChannelCount]; 139 alignas(16) i16 sparse_elements[BeamformerMaxChannelCount]; 140 /* NOTE(rnp): interleaved transmit angle, focal depth pairs */ 141 alignas(16) v2 focal_vectors[BeamformerMaxChannelCount]; 142 } BeamformerParameterBlock; 143 static_assert(sizeof(BeamformerParameterBlock) % alignof(BeamformerParameterBlock) == 0, 144 "sizeof(BeamformerParametersBlock) must be a multiple of its alignment"); 145 146 #define X(k, field) [BeamformerParameterBlockRegion_##k] = offsetof(BeamformerParameterBlock, field), 147 read_only global u16 BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_Count] = { 148 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 149 }; 150 #undef X 151 152 typedef struct { 153 u32 version; 154 155 /* NOTE(rnp): causes future library calls to fail. 156 * see note in beamformer_invalidate_shared_memory() */ 157 b32 invalid; 158 159 /* NOTE(rnp): not used for locking on w32 but we can use these to peek at the status of 160 * the lock without leaving userspace. */ 161 i32 locks[BeamformerSharedMemoryLockKind_Count + BeamformerMaxParameterBlockSlots]; 162 163 /* NOTE(rnp): total number of parameter block regions the client has requested. 164 * used to calculate offset to scratch space and to track number of allocated 165 * semaphores on w32. Defaults to 1 but can be changed at runtime */ 166 u32 reserved_parameter_blocks; 167 168 /* TODO(rnp): this is really sucky. we need a better way to communicate this */ 169 u32 scratch_rf_size; 170 171 BeamformerLiveImagingParameters live_imaging_parameters; 172 BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags; 173 174 BeamformWorkQueue external_work_queue; 175 } BeamformerSharedMemory; 176 177 function BeamformWork * 178 beamform_work_queue_pop(BeamformWorkQueue *q) 179 { 180 BeamformWork *result = 0; 181 182 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 183 u64 val = atomic_load_u64(&q->queue); 184 u64 mask = countof(q->work_items) - 1; 185 u64 widx = val & mask; 186 u64 ridx = val >> 32 & mask; 187 188 if (ridx != widx) 189 result = q->work_items + ridx; 190 191 return result; 192 } 193 194 function void 195 beamform_work_queue_pop_commit(BeamformWorkQueue *q) 196 { 197 atomic_add_u64(&q->queue, 0x100000000ULL); 198 } 199 200 function BeamformWork * 201 beamform_work_queue_push(BeamformWorkQueue *q) 202 { 203 BeamformWork *result = 0; 204 205 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 206 u64 val = atomic_load_u64(&q->queue); 207 u64 mask = countof(q->work_items) - 1; 208 u64 widx = val & mask; 209 u64 ridx = val >> 32 & mask; 210 u64 next = (widx + 1) & mask; 211 212 if (val & 0x80000000) 213 atomic_and_u64(&q->queue, ~0x80000000); 214 215 if (next != ridx) { 216 result = q->work_items + widx; 217 zero_struct(result); 218 } 219 220 return result; 221 } 222 223 function void 224 beamform_work_queue_push_commit(BeamformWorkQueue *q) 225 { 226 atomic_add_u64(&q->queue, 1); 227 } 228 229 function BeamformerParameterBlock * 230 beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block) 231 { 232 assert(sm->reserved_parameter_blocks >= block); 233 BeamformerParameterBlock *result = (typeof(result))((u8 *)(sm + 1) + block * sizeof(*result)); 234 return result; 235 } 236 237 function BeamformerParameterBlock * 238 beamformer_parameter_block_lock(SharedMemoryRegion *sm, u32 block, i32 timeout_ms) 239 { 240 assert(block < BeamformerMaxParameterBlockSlots); 241 BeamformerSharedMemory *b = sm->region; 242 BeamformerParameterBlock *result = 0; 243 if (os_shared_memory_region_lock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block, (u32)timeout_ms)) 244 result = beamformer_parameter_block(sm->region, block); 245 return result; 246 } 247 248 function void 249 beamformer_parameter_block_unlock(SharedMemoryRegion *sm, u32 block) 250 { 251 assert(block < BeamformerMaxParameterBlockSlots); 252 BeamformerSharedMemory *b = sm->region; 253 os_shared_memory_region_unlock(sm, b->locks, BeamformerSharedMemoryLockKind_Count + (i32)block); 254 } 255 256 function Arena 257 beamformer_shared_memory_scratch_arena(BeamformerSharedMemory *sm) 258 { 259 assert(sm->reserved_parameter_blocks > 0); 260 BeamformerParameterBlock *last = beamformer_parameter_block(sm, sm->reserved_parameter_blocks); 261 Arena result = {.beg = (u8 *)(last + 1), .end = (u8 *)sm + BEAMFORMER_SHARED_MEMORY_SIZE}; 262 result.beg = arena_aligned_start(result, KB(4)); 263 return result; 264 } 265 266 function void 267 mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region) 268 { 269 BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block); 270 atomic_or_u32(&pb->dirty_regions, 1 << region); 271 } 272 273 function void 274 mark_parameter_block_region_clean(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region) 275 { 276 BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block); 277 atomic_and_u32(&pb->dirty_regions, ~(1 << region)); 278 } 279 280 function void 281 post_sync_barrier(SharedMemoryRegion *sm, BeamformerSharedMemoryLockKind lock, i32 *locks) 282 { 283 /* NOTE(rnp): debug: here it is not a bug to release the lock if it 284 * isn't held but elswhere it is */ 285 DEBUG_DECL(if (locks[lock])) { 286 os_shared_memory_region_unlock(sm, locks, (i32)lock); 287 } 288 }