vulkan.c (101975B)
1 /* See LICENSE for license details. */ 2 // TODO(rnp) 3 // [ ]: what is needed for HDR? I think it makes sense to just default to it nowadays 4 // [ ]: once opengl is removed switch images to SRGB and/or 16 bit Float 5 // [ ]: VK_KHR_robustness2 probably shouldn't be required but it also might not matter 6 7 #include "beamformer_internal.h" 8 #include "vulkan.h" 9 #include "external/glslang/glslang/Include/glslang_c_interface.h" 10 11 #define ForceSingleQueue (0) 12 13 #define glslang_info(s) s8("[glslang] " s) 14 #define vulkan_info(s) s8("[vulkan] " s) 15 16 #define ValidVulkanHandle(h) ((h).value[0] != 0) 17 18 #define MaxCommandBuffersInFlight BeamformerMaxRawDataFramesInFlight 19 #define MaxCommandBufferTimestamps (1024) 20 21 typedef enum { 22 VulkanQueueKind_Graphics, 23 VulkanQueueKind_Compute, 24 VulkanQueueKind_Transfer, 25 VulkanQueueKind_Count, 26 } VulkanQueueKind; 27 28 typedef enum { 29 VulkanMemoryKind_Device, 30 VulkanMemoryKind_BAR, 31 VulkanMemoryKind_Host, 32 VulkanMemoryKind_Count, 33 } VulkanMemoryKind; 34 35 typedef struct { 36 VkDeviceMemory memory; 37 VkBuffer buffer; 38 u64 memory_size; 39 40 void * host_pointer; 41 42 VulkanMemoryKind memory_kind; 43 44 // NOTE: only used when the buffer is backing a VulkanRenderModel. 45 VkIndexType index_type; 46 } VulkanBuffer; 47 48 typedef struct { 49 VkDeviceMemory memory; 50 VkImage image; 51 VkImageView view; 52 } VulkanImage; 53 54 typedef struct { 55 VkPipeline pipeline; 56 VkPipelineLayout layout; 57 VkShaderStageFlags stage_flags; 58 } VulkanPipeline; 59 60 typedef struct { 61 VkSemaphore semaphore; 62 u64 value; 63 } VulkanSemaphore; 64 65 typedef struct { 66 VulkanTimeline timeline; 67 u32 buffer_index; 68 69 // NOTE(rnp): since there may not be QueueKind_Count queues, when putting values into this 70 // array you must be careful to map through the queue_indices array in the vulkan_context. 71 u64 in_flight_wait_values[VulkanQueueKind_Count]; 72 } VulkanCommandBuffer; 73 74 typedef enum { 75 VulkanEntityKind_Buffer, 76 VulkanEntityKind_CommandBuffer, 77 VulkanEntityKind_Image, 78 VulkanEntityKind_Pipeline, 79 VulkanEntityKind_RenderModel, 80 VulkanEntityKind_Semaphore, 81 } VulkanEntityKind; 82 83 typedef struct VulkanEntity VulkanEntity; 84 struct VulkanEntity { 85 VulkanEntity * next; 86 VulkanEntityKind kind; 87 union { 88 VulkanBuffer buffer; 89 VulkanCommandBuffer command_buffer; 90 VulkanImage image; 91 VulkanPipeline pipeline; 92 VulkanSemaphore semaphore; 93 } as; 94 }; 95 96 typedef alignas(64) struct { 97 i32 lock; 98 99 u16 queue_family; 100 u16 queue_index; 101 VkQueue queue; 102 103 VulkanSemaphore timeline_semaphore; 104 105 VkPipelineStageFlags2 pipeline_stage_flags; 106 } VulkanQueue; 107 static_assert(alignof(VulkanQueue) == 64, "VulkanQueue must be placed on its own cacheline"); 108 109 typedef alignas(64) struct { 110 i32 lock; 111 u32 next_index; 112 113 VulkanPipeline *bound_pipeline; 114 115 VkCommandPool handle; 116 VkQueryPool query_pool; 117 VkCommandBuffer buffers[MaxCommandBuffersInFlight]; 118 119 u64 submission_values[MaxCommandBuffersInFlight]; 120 u32 queries_occupied[MaxCommandBuffersInFlight]; 121 } VulkanCommandPool; 122 123 typedef struct { 124 Arena arena; 125 i32 arena_lock; 126 127 VkInstance handle; 128 VkDevice device; 129 VkPhysicalDevice physical_device; 130 131 VkDescriptorPool descriptor_pool; 132 VkDescriptorSetLayout descriptor_set_layouts[BeamformerShaderResourceKind_Count]; 133 VkDescriptorSet descriptor_sets[BeamformerShaderResourceKind_Count]; 134 // NOTE(rnp): must store these if we want to allow partial updates easily 135 VkDescriptorBufferInfo descriptor_buffer_infos[BeamformerShaderBufferSlot_Count]; 136 137 // NOTE(rnp): fallback for when a shader fails to compile 138 VulkanPipeline default_compute_pipeline; 139 VulkanPipeline default_graphics_pipeline; 140 141 GPUInfo gpu_info; 142 143 struct { 144 u64 max_allocation_size; 145 u64 non_coherent_atom_size; 146 u8 gpu_heap_index; 147 i8 memory_type_indices[VulkanMemoryKind_Count]; 148 b8 memory_host_coherent[VulkanMemoryKind_Count]; 149 static_assert(VK_MAX_MEMORY_HEAPS < I8_MAX, ""); 150 static_assert(VK_MAX_MEMORY_TYPES < U8_MAX, ""); 151 } memory_info; 152 153 VulkanCommandPool * command_pools[VulkanTimeline_Count]; 154 VulkanQueue * queues[VulkanQueueKind_Count]; 155 // NOTE(rnp): there are a few places in the code where simply going through the queues map 156 // is not sufficient. those places need to know of the unique queues which unique queue 157 // is being referred to. that code uses this map instead. 158 u16 queue_indices[VulkanQueueKind_Count]; 159 u16 unique_queues; 160 161 VkFormat swap_chain_image_format; 162 VkFormat depth_stencil_format; 163 164 VulkanEntity * entity_freelist; 165 Arena entity_arena; 166 i32 entity_lock; 167 } VulkanContext; 168 169 read_only global const char *vk_required_instance_extensions[] = { 170 }; 171 172 #if OS_WINDOWS 173 #define VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST \ 174 X("VK_KHR_external_memory_win32") \ 175 X("VK_KHR_external_semaphore_win32") \ 176 177 #else 178 #define VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST \ 179 X("VK_KHR_external_memory_fd") \ 180 X("VK_KHR_external_semaphore_fd") \ 181 182 #endif 183 184 #define VK_REQUIRED_DEVICE_EXTENSIONS_LIST \ 185 X("VK_KHR_16bit_storage") \ 186 X("VK_KHR_external_memory") \ 187 X("VK_KHR_external_semaphore") \ 188 X("VK_KHR_robustness2") \ 189 X("VK_KHR_storage_buffer_storage_class") \ 190 X("VK_KHR_timeline_semaphore") \ 191 VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST 192 193 #define X(str) s8_comp(str), 194 read_only global s8 vk_required_device_extensions[] = {VK_REQUIRED_DEVICE_EXTENSIONS_LIST}; 195 #undef X 196 197 #define VK_OPTIONAL_DEVICE_EXTENSIONS_LIST \ 198 X(VK_KHR, cooperative_matrix) \ 199 200 #define X(p, s, ...) s8_comp(#p "_" #s), 201 read_only global s8 vk_optional_device_extensions[] = {VK_OPTIONAL_DEVICE_EXTENSIONS_LIST}; 202 #undef X 203 204 #define VK_REQUIRED_PHYSICAL_FEATURES \ 205 X(shaderInt16) \ 206 X(shaderInt64) \ 207 208 #define VK_REQUIRED_PHYSICAL_11_FEATURES \ 209 X(storageBuffer16BitAccess) \ 210 211 #define VK_REQUIRED_PHYSICAL_12_FEATURES \ 212 X(bufferDeviceAddress) \ 213 X(shaderFloat16) \ 214 X(timelineSemaphore) \ 215 X(vulkanMemoryModel) \ 216 217 #define VK_REQUIRED_PHYSICAL_13_FEATURES \ 218 X(dynamicRendering) \ 219 X(synchronization2) \ 220 221 #define VK_DEBUG_EXTENSIONS \ 222 X(VK_KHR, shader_non_semantic_info) \ 223 X(VK_KHR, shader_relaxed_extended_instruction) \ 224 225 #define X(p, s, ...) s8_comp(#p "_" #s), 226 read_only global s8 vk_debug_extensions[] = {VK_DEBUG_EXTENSIONS}; 227 #undef X 228 229 #define VK_INSTANCE_DEBUG_EXTENSIONS_LIST \ 230 X(VK_EXT, debug_utils) \ 231 232 #define X(p, s, ...) s8_comp(#p "_" #s), 233 read_only global s8 vk_instance_debug_extensions[] = {VK_INSTANCE_DEBUG_EXTENSIONS_LIST}; 234 #undef X 235 236 #if BEAMFORMER_DEBUG 237 #define VK_VALIDATION_LAYERS_LIST \ 238 X(KHRONOS, validation) \ 239 240 #else 241 #define VK_VALIDATION_LAYERS_LIST 242 #endif 243 244 read_only global str8 vk_validation_layers[] = { 245 #define X(vendor, name, ...) str8_comp("VK_LAYER_" #vendor "_" #name), 246 VK_VALIDATION_LAYERS_LIST 247 #undef X 248 }; 249 250 global struct { 251 u32 driver_api_version; 252 union { 253 struct { 254 #define X(_, name, ...) b8 name; 255 VK_OPTIONAL_DEVICE_EXTENSIONS_LIST 256 #undef X 257 }; 258 b8 E[countof(vk_optional_device_extensions)]; 259 } optional; 260 261 union { 262 struct { 263 #define X(_, name, ...) b8 name; 264 VK_DEBUG_EXTENSIONS 265 #undef X 266 }; 267 b8 E[countof(vk_debug_extensions)]; 268 } debug; 269 270 union { 271 struct { 272 #define X(_, name, ...) b8 name; 273 VK_INSTANCE_DEBUG_EXTENSIONS_LIST 274 #undef X 275 }; 276 b8 E[countof(vk_instance_debug_extensions)]; 277 } instance; 278 279 #if BEAMFORMER_DEBUG 280 struct { 281 union { 282 struct { 283 #define X(_, name, ...) b8 name; 284 VK_VALIDATION_LAYERS_LIST 285 #undef X 286 }; 287 b8 E[countof(vk_validation_layers)]; 288 } enabled; 289 290 union { 291 struct { 292 #define X(_, name, ...) u32 name; 293 VK_VALIDATION_LAYERS_LIST 294 #undef X 295 }; 296 u32 E[countof(vk_validation_layers)]; 297 } version; 298 } layers; 299 #endif 300 } vulkan_config; 301 302 #define MAX_ENABLED_EXTENSIONS ( countof(vk_required_device_extensions) \ 303 + countof(vk_optional_device_extensions) \ 304 + countof(vk_debug_extensions) \ 305 ) 306 307 global VulkanContext vulkan_context[1]; 308 309 /* NOTE(rnp): the idea here is to set reasonable development constraints. 310 * They should probably not match one to one with the maximums of the dev 311 * machine's hardware. Instead these are here to cause compile time failure 312 * for features which are not expected to work everywhere. */ 313 global glslang_resource_t glslc_resource_constraints[1] = {{ 314 .max_compute_work_group_count_x = 65535, 315 .max_compute_work_group_count_y = 65535, 316 .max_compute_work_group_count_z = 65535, 317 .max_compute_work_group_size_x = 1024, 318 .max_compute_work_group_size_y = 1024, 319 .max_compute_work_group_size_z = 1024, 320 321 // NOTE: taken from glslang defaults 322 .max_lights = 32, 323 .max_clip_planes = 6, 324 .max_texture_units = 32, 325 .max_texture_coords = 32, 326 .max_vertex_attribs = 64, 327 .max_vertex_uniform_components = 4096, 328 .max_varying_floats = 64, 329 .max_vertex_texture_image_units = 32, 330 .max_combined_texture_image_units = 80, 331 .max_texture_image_units = 32, 332 .max_fragment_uniform_components = 4096, 333 .max_draw_buffers = 32, 334 .max_vertex_uniform_vectors = 128, 335 .max_varying_vectors = 8, 336 .max_fragment_uniform_vectors = 16, 337 .max_vertex_output_vectors = 16, 338 .max_fragment_input_vectors = 15, 339 .min_program_texel_offset = -8, 340 .max_program_texel_offset = 7, 341 .max_clip_distances = 8, 342 .max_compute_uniform_components = 1024, 343 .max_compute_texture_image_units = 16, 344 .max_compute_image_uniforms = 8, 345 .max_compute_atomic_counters = 8, 346 .max_compute_atomic_counter_buffers = 1, 347 .max_varying_components = 60, 348 .max_vertex_output_components = 64, 349 .max_fragment_input_components = 128, 350 .max_image_units = 8, 351 .max_combined_image_units_and_fragment_outputs = 8, 352 .max_combined_shader_output_resources = 8, 353 .max_image_samples = 0, 354 .max_vertex_image_uniforms = 0, 355 .max_fragment_image_uniforms = 8, 356 .max_combined_image_uniforms = 8, 357 .max_viewports = 16, 358 .max_vertex_atomic_counters = 0, 359 .max_fragment_atomic_counters = 8, 360 .max_combined_atomic_counters = 8, 361 .max_atomic_counter_bindings = 1, 362 .max_vertex_atomic_counter_buffers = 0, 363 .max_fragment_atomic_counter_buffers = 1, 364 .max_combined_atomic_counter_buffers = 1, 365 .max_atomic_counter_buffer_size = 16384, 366 .max_transform_feedback_buffers = 4, 367 .max_transform_feedback_interleaved_components = 64, 368 .max_cull_distances = 8, 369 .max_combined_clip_and_cull_distances = 8, 370 .max_samples = 4, 371 .max_mesh_output_vertices_ext = 256, 372 .max_mesh_output_primitives_ext = 256, 373 .max_mesh_work_group_size_x_ext = 128, 374 .max_mesh_work_group_size_y_ext = 128, 375 .max_mesh_work_group_size_z_ext = 128, 376 .max_task_work_group_size_x_ext = 128, 377 .max_task_work_group_size_y_ext = 128, 378 .max_task_work_group_size_z_ext = 128, 379 .max_mesh_view_count_ext = 4, 380 .max_dual_source_draw_buffers_ext = 1, 381 382 .limits = { 383 .non_inductive_for_loops = 1, 384 .while_loops = 1, 385 .do_while_loops = 1, 386 .general_uniform_indexing = 1, 387 .general_attribute_matrix_vector_indexing = 1, 388 .general_varying_indexing = 1, 389 .general_sampler_indexing = 1, 390 .general_variable_indexing = 1, 391 .general_constant_matrix_vector_indexing = 1, 392 }, 393 }}; 394 395 #if BEAMFORMER_RENDERDOC_HOOKS 396 DEBUG_IMPORT void * 397 vk_renderdoc_instance_handle(void) 398 { 399 return *((void **)vulkan_context->handle); 400 } 401 #endif 402 403 #if BEAMFORMER_DEBUG 404 #define vk_label_object(k, h, label, extra) vk_label_object_(VK_OBJECT_TYPE_##k, (u64)h, label, extra) 405 function void 406 vk_label_object_(VkObjectType kind, u64 handle, s8 label, s8 extra) 407 { 408 local_persist u8 buffer[1024]; 409 Stream sb = arena_stream(arena_from_memory(buffer, sizeof(buffer))); 410 if (vulkan_config.instance.debug_utils && label.len > 0) { 411 stream_append_s8s(&sb, label, s8(" ("), extra, s8(")")); 412 stream_append_byte(&sb, 0); 413 if (!sb.errors) { 414 VkDebugUtilsObjectNameInfoEXT object_name_info = { 415 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, 416 .objectType = kind, 417 .objectHandle = handle, 418 .pObjectName = (char *)sb.data, 419 }; 420 vkSetDebugUtilsObjectNameEXT(vulkan_context->device, &object_name_info); 421 } 422 } 423 } 424 #else 425 #define vk_label_object(...) 426 #define vk_label_object_(...) 427 #endif 428 429 function VulkanEntity * 430 vk_entity_allocate(VulkanEntityKind kind) 431 { 432 VulkanEntity *result = 0; 433 DeferLoop(take_lock(&vulkan_context->entity_lock, -1), release_lock(&vulkan_context->entity_lock)) 434 { 435 result = SLLPopFreelist(vulkan_context->entity_freelist); 436 if (!result) result = push_array_no_zero(&vulkan_context->entity_arena, VulkanEntity, 1); 437 } 438 439 zero_struct(result); 440 result->kind = kind; 441 return result; 442 } 443 444 function void 445 vk_entity_release(VulkanEntity *entity) 446 { 447 DeferLoop(take_lock(&vulkan_context->entity_lock, -1), release_lock(&vulkan_context->entity_lock)) 448 { 449 SLLStackPush(vulkan_context->entity_freelist, entity, next); 450 } 451 } 452 453 function void * 454 vk_entity_data(VulkanHandle h, VulkanEntityKind kind) 455 { 456 VulkanEntity *e = (VulkanEntity *)h.value[0]; 457 assert(ValidVulkanHandle(h) && e->kind == kind); 458 return &e->as; 459 } 460 461 function VkCommandBuffer 462 vk_command_buffer(VulkanHandle h) 463 { 464 VulkanCommandBuffer *vcb = vk_entity_data(h, VulkanEntityKind_CommandBuffer); 465 VulkanCommandPool *vcp = vulkan_context->command_pools[vcb->timeline]; 466 VkCommandBuffer result = vcp->buffers[vcb->buffer_index]; 467 return result; 468 } 469 470 #define glslang_log(a, ...) glslang_log_(a, arg_list(s8, __VA_ARGS__)) 471 function void 472 glslang_log_(Arena arena, s8 *items, uz count) 473 { 474 Stream sb = arena_stream(arena); 475 stream_append_s8(&sb, glslang_info("")); 476 stream_append_s8s_(&sb, items, count); 477 if (sb.data[sb.widx - 1] != '\n') stream_append_byte(&sb, '\n'); 478 os_console_log(sb.data, sb.widx); 479 } 480 481 function s8 482 glsl_to_spirv(Arena *arena, u32 kind, s8 shader_text, s8 name) 483 { 484 /* NOTE(rnp): glslang's garbage c interface doesn't expose internal usage of strings with length */ 485 assert(shader_text.data[shader_text.len] == 0); 486 487 glslang_input_t input = { 488 .language = GLSLANG_SOURCE_GLSL, 489 .stage = kind, 490 .client = GLSLANG_CLIENT_VULKAN, 491 .client_version = GLSLANG_TARGET_VULKAN_1_4, 492 .target_language = GLSLANG_TARGET_SPV, 493 .target_language_version = GLSLANG_TARGET_SPV_1_6, 494 .code = (c8 *)shader_text.data, 495 .default_version = 460, 496 .default_profile = GLSLANG_NO_PROFILE, 497 .force_default_version_and_profile = 0, 498 .forward_compatible = 0, 499 .messages = GLSLANG_MSG_DEFAULT_BIT, 500 .resource = glslc_resource_constraints, 501 }; 502 glslang_shader_t *shader = glslang_shader_create(&input); 503 504 s8 error = {0}; 505 if (glslang_shader_preprocess(shader, &input)) { 506 if (!glslang_shader_parse(shader, &input)) 507 error = s8("parsing failed"); 508 } else { 509 error = s8("preprocessing failed"); 510 } 511 512 if (error.len) { 513 glslang_log(*arena, name, s8(": "), error, s8("\n"), 514 c_str_to_s8((c8 *)glslang_shader_get_info_log(shader)), 515 c_str_to_s8((c8 *)glslang_shader_get_info_debug_log(shader))); 516 glslang_shader_delete(shader); 517 shader = 0; 518 } 519 520 s8 result = {0}; 521 if (shader) { 522 glslang_program_t *program = glslang_program_create(); 523 glslang_program_add_shader(program, shader); 524 i32 messages = GLSLANG_MSG_DEBUG_INFO_BIT|GLSLANG_MSG_SPV_RULES_BIT|GLSLANG_MSG_VULKAN_RULES_BIT; 525 if (glslang_program_link(program, messages)) { 526 glslang_spv_options_t options = {.validate = 1,}; 527 528 if (vulkan_config.debug.shader_non_semantic_info && 529 vulkan_config.debug.shader_relaxed_extended_instruction) 530 { 531 options.generate_debug_info = 1; 532 options.emit_nonsemantic_shader_debug_info = 1; 533 options.emit_nonsemantic_shader_debug_source = 1; 534 } 535 536 glslang_program_add_source_text(program, kind, (c8 *)shader_text.data, shader_text.len); 537 glslang_program_SPIRV_generate_with_options(program, kind, &options); 538 539 u32 words = glslang_program_SPIRV_get_size(program); 540 result.data = (u8 *)push_array(arena, u32, words); 541 result.len = words * sizeof(u32); 542 glslang_program_SPIRV_get(program, (u32 *)result.data); 543 544 s8 spirv_msg = c_str_to_s8((c8 *)glslang_program_SPIRV_get_messages(program)); 545 if (spirv_msg.len) glslang_log(*arena, name, s8(": spirv info: "), spirv_msg); 546 } else { 547 glslang_log(*arena, name, s8(": shader linking failed\n"), 548 c_str_to_s8((c8 *)glslang_program_get_info_log(program)), 549 c_str_to_s8((c8 *)glslang_program_get_info_debug_log(program))); 550 } 551 glslang_shader_delete(shader); 552 glslang_program_delete(program); 553 } 554 555 return result; 556 } 557 558 function u32 559 vk_shader_kind_to_glslang_shader_kind(u32 kind) 560 { 561 u32 result = ctz_u64(kind); 562 return result; 563 } 564 565 function VkShaderModule 566 vk_compile_shader_module(Arena arena, u32 kind, s8 text, s8 name) 567 { 568 VkShaderModule result = {0}; 569 s8 spirv = glsl_to_spirv(&arena, vk_shader_kind_to_glslang_shader_kind(kind), text, name); 570 VkShaderModuleCreateInfo create_info = { 571 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 572 .codeSize = (uz)spirv.len, 573 .pCode = (u32 *)spirv.data, 574 }; 575 if (spirv.len > 0) vkCreateShaderModule(vulkan_context->device, &create_info, 0, &result); 576 577 return result; 578 } 579 580 function VkShaderStageFlags 581 vk_stage_flags_from_shader_kind(VulkanShaderKind kind) 582 { 583 read_only local_persist VkShaderStageFlags map[VulkanShaderKind_Count + 1] = { 584 [VulkanShaderKind_Vertex] = VK_SHADER_STAGE_VERTEX_BIT, 585 [VulkanShaderKind_Mesh] = VK_SHADER_STAGE_MESH_BIT_EXT, 586 [VulkanShaderKind_Fragment] = VK_SHADER_STAGE_FRAGMENT_BIT, 587 [VulkanShaderKind_Compute] = VK_SHADER_STAGE_COMPUTE_BIT, 588 [VulkanShaderKind_Count] = 0, 589 }; 590 VkShaderStageFlags result = map[Clamp((u32)kind, 0, VulkanShaderKind_Count)]; 591 return result; 592 } 593 594 function VulkanPipeline 595 vk_compute_pipeline_from_shader_text(Arena arena, s8 text, s8 name, u32 push_constants_size) 596 { 597 VulkanPipeline result = {.stage_flags = VK_SHADER_STAGE_COMPUTE_BIT}; 598 VkShaderModule module = vk_compile_shader_module(arena, VK_SHADER_STAGE_COMPUTE_BIT, text, name); 599 if (module) { 600 VkPushConstantRange push_constant_range = { 601 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 602 .offset = 0, 603 .size = push_constants_size, 604 }; 605 606 VkPipelineLayoutCreateInfo pipeline_layout_create_info = { 607 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 608 .setLayoutCount = countof(vulkan_context->descriptor_set_layouts), 609 .pSetLayouts = vulkan_context->descriptor_set_layouts, 610 .pushConstantRangeCount = push_constants_size ? 1 : 0, 611 .pPushConstantRanges = push_constants_size ? &push_constant_range : 0, 612 }; 613 614 vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_create_info, 0, &result.layout); 615 616 VkComputePipelineCreateInfo pipeline_create_info = { 617 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 618 .layout = result.layout, 619 .stage = { 620 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 621 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 622 .module = module, 623 .pName = "main", 624 }, 625 }; 626 627 vkCreateComputePipelines(vulkan_context->device, 0, 1, &pipeline_create_info, 0, &result.pipeline); 628 629 vk_label_object(PIPELINE, result.pipeline, name, s8("Pipeline")); 630 vk_label_object(PIPELINE_LAYOUT, result.layout, name, s8("Pipeline Layout")); 631 vk_label_object(SHADER_MODULE, module, name, s8("Module")); 632 633 vkDestroyShaderModule(vulkan_context->device, module, 0); 634 } 635 if (result.pipeline == 0) result = vulkan_context->default_compute_pipeline; 636 637 return result; 638 } 639 640 function VulkanPipeline 641 vk_graphics_pipeline_from_infos(Arena arena, VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size) 642 { 643 assume(count == 2); 644 645 VulkanPipeline result = {0}; 646 VkShaderModule modules[2]; 647 648 modules[0] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[0].kind), 649 infos[0].text, infos[0].name); 650 modules[1] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[1].kind), 651 infos[1].text, infos[1].name); 652 if (modules[0] && modules[1]) { 653 result.stage_flags = vk_stage_flags_from_shader_kind(infos[0].kind) 654 | vk_stage_flags_from_shader_kind(infos[1].kind); 655 656 VkPushConstantRange pcr = { 657 .stageFlags = result.stage_flags, 658 .offset = 0, 659 .size = push_constants_size, 660 }; 661 662 VkPipelineLayoutCreateInfo pipeline_layout_info = { 663 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 664 .setLayoutCount = countof(vulkan_context->descriptor_set_layouts), 665 .pSetLayouts = vulkan_context->descriptor_set_layouts, 666 .pushConstantRangeCount = push_constants_size ? 1 : 0, 667 .pPushConstantRanges = push_constants_size ? &pcr : 0, 668 }; 669 670 vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_info, 0, &result.layout); 671 672 VkPipelineShaderStageCreateInfo shader_stage_create_infos[2] = { 673 { 674 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 675 .stage = vk_stage_flags_from_shader_kind(infos[0].kind), 676 .module = modules[0], 677 .pName = "main", 678 }, 679 { 680 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 681 .stage = vk_stage_flags_from_shader_kind(infos[1].kind), 682 .module = modules[1], 683 .pName = "main", 684 }, 685 }; 686 687 VkPipelineVertexInputStateCreateInfo vertex_input_info = { 688 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, 689 }; 690 691 VkPipelineInputAssemblyStateCreateInfo input_assembly_info = { 692 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, 693 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 694 }; 695 696 VkPipelineViewportStateCreateInfo viewport_info = { 697 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, 698 .viewportCount = 1, 699 .scissorCount = 1, 700 }; 701 702 VkPipelineRasterizationStateCreateInfo rasterization_info = { 703 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, 704 .polygonMode = VK_POLYGON_MODE_FILL, 705 .lineWidth = 1.0f, 706 .cullMode = VK_CULL_MODE_BACK_BIT, 707 .frontFace = VK_FRONT_FACE_CLOCKWISE, 708 }; 709 710 VkPipelineMultisampleStateCreateInfo multisampling_info = { 711 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 712 .rasterizationSamples = vulkan_context->gpu_info.max_msaa_samples, 713 }; 714 715 VkPipelineDepthStencilStateCreateInfo depth_test_create_info = { 716 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, 717 .depthTestEnable = 1, 718 .depthWriteEnable = 1, 719 .depthCompareOp = VK_COMPARE_OP_LESS, 720 .depthBoundsTestEnable = 1, 721 .stencilTestEnable = 0, 722 .front = {0}, 723 .back = {0}, 724 .minDepthBounds = 0.0f, 725 .maxDepthBounds = 1.0f, 726 }; 727 728 u32 colour_mask = VK_COLOR_COMPONENT_R_BIT|VK_COLOR_COMPONENT_G_BIT|VK_COLOR_COMPONENT_B_BIT|VK_COLOR_COMPONENT_A_BIT; 729 VkPipelineColorBlendAttachmentState blend_state = { 730 .colorWriteMask = colour_mask, 731 .blendEnable = 1, 732 .srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA, 733 .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, 734 .colorBlendOp = VK_BLEND_OP_ADD, 735 .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE, 736 .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, 737 .alphaBlendOp = VK_BLEND_OP_ADD, 738 }; 739 740 VkPipelineColorBlendStateCreateInfo colour_blend_state_create = { 741 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, 742 .logicOpEnable = 0, 743 .logicOp = VK_LOGIC_OP_COPY, 744 .attachmentCount = 1, 745 .pAttachments = &blend_state, 746 }; 747 748 VkDynamicState dynamic_states[] = { 749 VK_DYNAMIC_STATE_VIEWPORT, 750 VK_DYNAMIC_STATE_SCISSOR, 751 }; 752 753 VkPipelineDynamicStateCreateInfo dynamic_state_info = { 754 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, 755 .dynamicStateCount = countof(dynamic_states), 756 .pDynamicStates = dynamic_states, 757 }; 758 759 //VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_SRGB; 760 VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_UNORM; 761 VkPipelineRenderingCreateInfo rendering_create_info = { 762 .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, 763 .colorAttachmentCount = 1, 764 .pColorAttachmentFormats = &colour_attachment_format, 765 .depthAttachmentFormat = vulkan_context->depth_stencil_format, 766 .stencilAttachmentFormat = vulkan_context->depth_stencil_format, 767 }; 768 769 VkGraphicsPipelineCreateInfo pci = { 770 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 771 .pNext = &rendering_create_info, 772 .stageCount = countof(shader_stage_create_infos), 773 .pStages = shader_stage_create_infos, 774 .pVertexInputState = &vertex_input_info, 775 .pInputAssemblyState = &input_assembly_info, 776 .pViewportState = &viewport_info, 777 .pRasterizationState = &rasterization_info, 778 .pMultisampleState = &multisampling_info, 779 .pDepthStencilState = &depth_test_create_info, 780 .pColorBlendState = &colour_blend_state_create, 781 .pDynamicState = &dynamic_state_info, 782 .layout = result.layout, 783 }; 784 785 vkCreateGraphicsPipelines(vulkan_context->device, 0, 1, &pci,0, &result.pipeline); 786 787 s8 extras[] = { 788 [VulkanShaderKind_Vertex] = s8_comp("Vertex Module"), 789 [VulkanShaderKind_Mesh] = s8_comp("Mesh Module"), 790 [VulkanShaderKind_Fragment] = s8_comp("Fragment Module"), 791 }; 792 assert(infos[0].kind < countof(extras)); 793 assert(infos[1].kind < countof(extras)); 794 795 vk_label_object(PIPELINE, result.pipeline, infos[0].name, s8("Pipeline")); 796 vk_label_object(PIPELINE_LAYOUT, result.layout, infos[0].name, s8("Pipeline Layout")); 797 //vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[0], infos[0].name, extras[infos[0].kind]); 798 //vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[1], infos[1].name, extras[infos[1].kind]); 799 } 800 801 if (modules[0]) vkDestroyShaderModule(vulkan_context->device, modules[0], 0); 802 if (modules[1]) vkDestroyShaderModule(vulkan_context->device, modules[1], 0); 803 804 if (result.pipeline == 0) result = vulkan_context->default_graphics_pipeline; 805 806 return result; 807 } 808 809 function VulkanSemaphore 810 vk_make_semaphore(OSHandle *export) 811 { 812 VulkanContext *vk = vulkan_context; 813 814 VkSemaphoreCreateInfo sci = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO}; 815 VkExportSemaphoreCreateInfo esci = { 816 .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, 817 .handleTypes = OS_WINDOWS ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT 818 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, 819 }; 820 VkSemaphoreTypeCreateInfo stc = { 821 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, 822 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, 823 }; 824 825 if (export) sci.pNext = &esci; 826 else sci.pNext = &stc; 827 828 VulkanSemaphore result = {0}; 829 830 vkCreateSemaphore(vk->device, &sci, 0, &result.semaphore); 831 832 if (export) { 833 if (OS_WINDOWS) { 834 VkSemaphoreGetWin32HandleInfoKHR ghi = { 835 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR, 836 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT, 837 .semaphore = result.semaphore, 838 }; 839 void *handle; 840 vkGetSemaphoreWin32HandleKHR(vk->device, &ghi, &handle); 841 export->value[0] = (u64)handle; 842 } else { 843 VkSemaphoreGetFdInfoKHR ghi = { 844 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, 845 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, 846 .semaphore = result.semaphore, 847 }; 848 i32 handle; 849 vkGetSemaphoreFdKHR(vk->device, &ghi, &handle); 850 export->value[0] = (u64)handle; 851 } 852 } 853 854 return result; 855 } 856 857 function void 858 vk_release_memory(VkDeviceMemory memory, u64 size) 859 { 860 VulkanContext *vk = vulkan_context; 861 vkFreeMemory(vk->device, memory, 0); 862 atomic_add_u64(&vk->gpu_info.gpu_heap_used, -size); 863 } 864 865 function b32 866 vk_allocate_memory(VkDeviceMemory *memory, u64 size, VulkanMemoryKind kind, VkMemoryAllocateFlags flags, 867 VkMemoryDedicatedAllocateInfo *dedicated_allocate_info, OSHandle *export) 868 { 869 VulkanContext *vk = vulkan_context; 870 871 VkExportMemoryAllocateInfo export_info = { 872 .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, 873 .handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT 874 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, 875 }; 876 877 VkMemoryAllocateFlagsInfo memory_allocate_flags_info = { 878 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, 879 .flags = flags, 880 .pNext = dedicated_allocate_info, 881 }; 882 883 if (export) { 884 export_info.pNext = dedicated_allocate_info; 885 memory_allocate_flags_info.pNext = &export_info; 886 } 887 888 VkMemoryAllocateInfo memory_allocate_info = { 889 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, 890 .allocationSize = size, 891 .memoryTypeIndex = vk->memory_info.memory_type_indices[kind], 892 .pNext = &memory_allocate_flags_info, 893 }; 894 895 b32 result = vkAllocateMemory(vk->device, &memory_allocate_info, 0, memory) == VK_SUCCESS; 896 if (result) { 897 atomic_add_u64(&vk->gpu_info.gpu_heap_used, memory_allocate_info.allocationSize); 898 899 if (export) { 900 if (OS_WINDOWS) { 901 VkMemoryGetWin32HandleInfoKHR handle_info = { 902 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, 903 .memory = *memory, 904 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, 905 }; 906 void *handle; 907 vkGetMemoryWin32HandleKHR(vk->device, &handle_info, &handle); 908 export->value[0] = (u64)handle; 909 } else { 910 VkMemoryGetFdInfoKHR fd_info = { 911 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, 912 .memory = *memory, 913 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, 914 }; 915 i32 fd; 916 vkGetMemoryFdKHR(vk->device, &fd_info, &fd); 917 export->value[0] = (u64)fd; 918 } 919 } 920 } 921 return result; 922 } 923 924 function u32 925 vk_index_size(VkIndexType type) 926 { 927 u32 result = 0; 928 switch (type) { 929 case VK_INDEX_TYPE_UINT16:{ result = 2; }break; 930 case VK_INDEX_TYPE_UINT32:{ result = 4; }break; 931 InvalidDefaultCase; 932 } 933 return result; 934 } 935 936 typedef struct { 937 GPUBuffer *gpu_buffer; 938 u64 size; 939 VulkanUsageFlags flags; 940 u32 queue_family_count; 941 u32 queue_family_indices[VulkanTimeline_Count]; 942 VkIndexType index_type; 943 s8 label; 944 } VulkanBufferAllocateInfo; 945 946 function b32 947 vk_buffer_allocate_common(VulkanBuffer *vb, VulkanBufferAllocateInfo *ai) 948 { 949 VulkanContext *vk = vulkan_context; 950 951 // TODO(rnp): this probably should be handled, its usually 4GB. likely 952 // need to chain multiple allocations and handle it in shader code 953 u64 clamp_size = vk->memory_info.max_allocation_size & ~(vk->memory_info.non_coherent_atom_size - 1); 954 955 // NOTE(rnp): renderdoc can't handle buffers that are too close to the allocation size limit 956 if (renderdoc_attached()) 957 clamp_size -= MB(8); 958 959 u64 size = Min(ai->size, clamp_size); 960 961 VkBufferCreateInfo buffer_create_info = { 962 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 963 .usage = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT|VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 964 .size = size, 965 .sharingMode = ai->queue_family_count > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE, 966 .queueFamilyIndexCount = ai->queue_family_count, 967 .pQueueFamilyIndices = ai->queue_family_indices, 968 }; 969 970 if (ai->flags & VulkanUsageFlag_TransferSource) 971 buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; 972 973 if (ai->flags & VulkanUsageFlag_TransferDestination) 974 buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; 975 976 if (ai->index_type != VK_INDEX_TYPE_NONE_KHR) 977 buffer_create_info.usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; 978 979 vkCreateBuffer(vk->device, &buffer_create_info, 0, &vb->buffer); 980 vk_label_object(BUFFER, vb->buffer, ai->label, s8("Buffer")); 981 982 VkMemoryRequirements memory_requirements; 983 vkGetBufferMemoryRequirements(vk->device, vb->buffer, &memory_requirements); 984 985 assert((u64)size <= memory_requirements.size); 986 size = memory_requirements.size; 987 988 VkMemoryDedicatedAllocateInfo dedicated_allocate_info = { 989 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, 990 .buffer = vb->buffer, 991 }; 992 993 /* NOTE(rnp): to create a CPU writable buffer: 994 * 1. try to allocate and map the entire buffer 995 * - this may fail if the buffer is bigger than the BAR size 996 * (unknowable from vulkan), or the memory space has become 997 * too fragmented (unlikely) 998 * 2. if allocation or mapping fails we must chain a host buffer 999 * for staging. If this happens in practice we should add 1000 * the ability to import an existing external allocation 1001 */ 1002 b32 host_read_write = (ai->flags & VulkanUsageFlag_HostReadWrite) != 0; 1003 vb->memory_kind = host_read_write ? VulkanMemoryKind_BAR : VulkanMemoryKind_Device; 1004 1005 b32 result = 0; 1006 // TODO(rnp): this may fail if the allocation is too big for the BAR size 1007 // it needs to handled properly 1008 if (vk_allocate_memory(&vb->memory, size, vb->memory_kind, VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, &dedicated_allocate_info, 0)) { 1009 result = 1; 1010 ai->gpu_buffer->size = size; 1011 vb->memory_size = size; 1012 1013 vb->index_type = ai->index_type; 1014 1015 vk_label_object(DEVICE_MEMORY, vb->memory, ai->label, s8("Memory")); 1016 1017 if (host_read_write) 1018 vkMapMemory(vk->device, vb->memory, 0, size, 0, &vb->host_pointer); 1019 1020 vkBindBufferMemory(vk->device, vb->buffer, vb->memory, 0); 1021 VkBufferDeviceAddressInfo buffer_device_address_info = { 1022 .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, 1023 .buffer = vb->buffer, 1024 }; 1025 ai->gpu_buffer->gpu_pointer = vkGetBufferDeviceAddress(vk->device, &buffer_device_address_info); 1026 } 1027 return result; 1028 } 1029 1030 function void 1031 vk_load_instance(Arena arena, Stream *err) 1032 { 1033 #define X(name, ...) name = (name##_fn *)vkGetInstanceProcAddr(0, #name); 1034 VkBaseProcedureList 1035 #undef X 1036 1037 u32 enabled_validation_layers_count = 0; 1038 const char *enabled_validation_layers[countof(vk_validation_layers)]; 1039 1040 u32 enabled_instance_extensions_count = 0; 1041 const char *enabled_instance_extensions[countof(vk_required_instance_extensions) + countof(vk_instance_debug_extensions)]; 1042 1043 static_assert(countof(vk_required_instance_extensions) == 0, ""); 1044 //for EachElement(vk_required_instance_extensions, it) 1045 // enabled_instance_extensions[enabled_instance_extensions_count++] = vk_required_instance_extensions[it]; 1046 1047 #if BEAMFORMER_DEBUG 1048 { 1049 u32 layer_count = 0; 1050 vkEnumerateInstanceLayerProperties(&layer_count, 0); 1051 1052 VkLayerProperties *layers = push_array(&arena, VkLayerProperties, layer_count); 1053 str8 *layer_str8s = push_array(&arena, str8, layer_count); 1054 vkEnumerateInstanceLayerProperties(&layer_count, layers); 1055 1056 for (u32 i = 0; i < layer_count; i++) 1057 layer_str8s[i] = str8_from_c_str(layers[i].layerName); 1058 1059 for EachElement(vk_validation_layers, it) { 1060 for(u32 i = 0; i < layer_count; i++) { 1061 if (str8_equal(vk_validation_layers[it], layer_str8s[i])) { 1062 u32 index = enabled_validation_layers_count++; 1063 enabled_validation_layers[index] = (char *)vk_validation_layers[it].data; 1064 vulkan_config.layers.enabled.E[it] = 1; 1065 vulkan_config.layers.version.E[it] = layers[i].specVersion; 1066 break; 1067 } 1068 } 1069 } 1070 1071 if (countof(vk_validation_layers) != enabled_validation_layers_count) { 1072 i32 missing_count = countof(vk_validation_layers) - enabled_validation_layers_count; 1073 stream_append_s8s(err, vulkan_info("missing validation layer"), 1074 missing_count > 1 ? s8("s:") : s8(":"), s8("\n")); 1075 1076 for EachElement(vk_validation_layers, it) 1077 if (vulkan_config.layers.enabled.E[it] == 0) 1078 stream_append_s8s(err, s8(" "), s8_from_str8(vk_validation_layers[it]), s8("\n")); 1079 } 1080 1081 u32 instance_extension_count = 0; 1082 vkEnumerateInstanceExtensionProperties(0, &instance_extension_count, 0); 1083 1084 VkExtensionProperties *instance_extensions = push_array(&arena, VkExtensionProperties, instance_extension_count); 1085 s8 *instance_ext_s8s = push_array(&arena, s8, instance_extension_count); 1086 vkEnumerateInstanceExtensionProperties(0, &instance_extension_count, instance_extensions); 1087 for EachIndex(instance_extension_count, it) 1088 instance_ext_s8s[it] = c_str_to_s8(instance_extensions[it].extensionName); 1089 1090 for EachElement(vk_instance_debug_extensions, it) { 1091 for EachIndex(instance_extension_count, i) { 1092 if (s8_equal(vk_instance_debug_extensions[it], instance_ext_s8s[i])) { 1093 u32 index = enabled_instance_extensions_count++; 1094 enabled_instance_extensions[index] = (char *)vk_instance_debug_extensions[it].data; 1095 vulkan_config.instance.E[it] = 1; 1096 break; 1097 } 1098 } 1099 } 1100 } 1101 #endif 1102 1103 VkApplicationInfo app_info = { 1104 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, 1105 .pApplicationName = BEAMFORMER_NAME_STRING, 1106 .applicationVersion = 0, 1107 .pEngineName = "No Engine", 1108 .engineVersion = 0, 1109 .apiVersion = VK_MAKE_API_VERSION(1, 3, 0, 0), 1110 }; 1111 1112 VkInstanceCreateInfo instance_create_info = { 1113 .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, 1114 .pApplicationInfo = &app_info, 1115 .ppEnabledExtensionNames = enabled_instance_extensions, 1116 .enabledExtensionCount = enabled_instance_extensions_count, 1117 .ppEnabledLayerNames = enabled_validation_layers, 1118 .enabledLayerCount = enabled_validation_layers_count, 1119 }; 1120 1121 #if 0 && BEAMFORMER_DEBUG 1122 VkValidationFeatureEnableEXT validation_feature_enables[] = { 1123 VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, 1124 VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT, 1125 VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT, 1126 VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, 1127 }; 1128 1129 VkValidationFeaturesEXT validation_features = { 1130 .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, 1131 .enabledValidationFeatureCount = countof(validation_feature_enables), 1132 .pEnabledValidationFeatures = validation_feature_enables, 1133 }; 1134 1135 instance_create_info.pNext = &validation_features; 1136 #endif 1137 1138 vkCreateInstance(&instance_create_info, 0, &vulkan_context->handle); 1139 1140 #define X(name, ...) name = (name##_fn *)vkGetInstanceProcAddr(vulkan_context->handle, #name); 1141 VkInstanceProcedureList 1142 #undef X 1143 } 1144 1145 function void 1146 vk_load_physical_device(Arena arena, Stream *err) 1147 { 1148 VulkanContext *vk = vulkan_context; 1149 1150 u32 device_count; 1151 vkEnumeratePhysicalDevices(vk->handle, &device_count, 0); 1152 1153 VkPhysicalDevice *devices = push_array(&arena, typeof(*devices), device_count); 1154 vkEnumeratePhysicalDevices(vk->handle, &device_count, devices); 1155 1156 i32 best_index = -1, best_score = -1; 1157 for (u32 i = 0; i < device_count; i++) { 1158 Arena scratch = arena; 1159 VkPhysicalDeviceProperties2 *dp = push_struct(&scratch, typeof(*dp)); 1160 dp->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; 1161 vkGetPhysicalDeviceProperties2(devices[i], dp); 1162 1163 i32 score = 0; 1164 if (dp->properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) 1165 score++; 1166 1167 if (score > best_score) { 1168 best_score = score; 1169 best_index = (i32)i; 1170 } 1171 } 1172 1173 vk->physical_device = best_index >= 0 ? devices[best_index] : 0; 1174 if (!vk->physical_device) 1175 fatal(vulkan_info("failed to find a suitable GPU\n")); 1176 1177 VkPhysicalDeviceProperties2 dp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; 1178 VkPhysicalDeviceVulkan11Properties v11p = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES}; 1179 dp.pNext = &v11p; 1180 1181 vkGetPhysicalDeviceProperties2(vk->physical_device, &dp); 1182 1183 stream_append_s8s(err, vulkan_info("selecting device: "), c_str_to_s8(dp.properties.deviceName), s8("\n")); 1184 stream_append_s8(err, vulkan_info("Vulkan Version: ")); 1185 { 1186 u32 dv = dp.properties.apiVersion; 1187 stream_appendf(err, "%u.%u.%u\n", VK_API_VERSION_MAJOR(dv), VK_API_VERSION_MINOR(dv), VK_API_VERSION_PATCH(dv)); 1188 } 1189 1190 { 1191 Arena scratch = arena; 1192 u32 extension_count = 0; 1193 vkEnumerateDeviceExtensionProperties(vk->physical_device, 0, &extension_count, 0); 1194 VkExtensionProperties *extensions = push_array(&scratch, VkExtensionProperties, extension_count); 1195 vkEnumerateDeviceExtensionProperties(vk->physical_device, 0, &extension_count, extensions); 1196 1197 s8 *ext_str8s = push_array(&scratch, s8, extension_count); 1198 for (u32 index = 0; index < extension_count; index++) 1199 ext_str8s[index] = c_str_to_s8(extensions[index].extensionName); 1200 1201 b8 *supported = push_array(&scratch, b8, countof(vk_required_device_extensions)); 1202 for EachIndex(extension_count, index) 1203 for EachElement(vk_required_device_extensions, it) 1204 supported[it] |= s8_equal(vk_required_device_extensions[it], ext_str8s[index]); 1205 1206 u32 supported_count = 0; 1207 for EachElement(vk_required_device_extensions, it) 1208 supported_count += supported[it]; 1209 1210 u32 missing_count = countof(vk_required_device_extensions) - supported_count; 1211 if (missing_count) { 1212 stream_append_s8s(err, vulkan_info("fatal error: missing required device extension"), 1213 missing_count > 1 ? s8("s") : s8(""), s8(":\n")); 1214 for EachElement(vk_required_device_extensions, it) { 1215 if (!supported[it]) { 1216 s8 name = vk_required_device_extensions[it]; 1217 stream_append_s8s(err, vulkan_info(" "), name, s8("\n")); 1218 } 1219 } 1220 fatal(stream_to_s8(err)); 1221 } 1222 1223 for EachIndex(extension_count, index) 1224 for EachElement(vk_optional_device_extensions, it) 1225 vulkan_config.optional.E[it] |= s8_equal(vk_optional_device_extensions[it], ext_str8s[index]); 1226 1227 #if BEAMFORMER_DEBUG 1228 for EachIndex(extension_count, index) 1229 for EachElement(vk_debug_extensions, it) 1230 vulkan_config.debug.E[it] |= s8_equal(vk_debug_extensions[it], ext_str8s[index]); 1231 #endif 1232 } 1233 1234 { 1235 VkPhysicalDeviceFeatures2 df = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2}; 1236 VkPhysicalDeviceVulkan11Features v11f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES}; 1237 VkPhysicalDeviceVulkan12Features v12f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES}; 1238 VkPhysicalDeviceVulkan13Features v13f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES}; 1239 df.pNext = &v11f; 1240 v11f.pNext = &v12f; 1241 v12f.pNext = &v13f; 1242 vkGetPhysicalDeviceFeatures2(vk->physical_device, &df); 1243 1244 { 1245 b32 all_supported = 1; 1246 #define X(name, ...) all_supported &= df.features.name; 1247 VK_REQUIRED_PHYSICAL_FEATURES 1248 #undef X 1249 1250 if (!all_supported) { 1251 stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n")); 1252 #define X(name, ...) if (!df.features.name) stream_append_s8(err, s8(" " #name "\n")); 1253 VK_REQUIRED_PHYSICAL_FEATURES 1254 #undef X 1255 fatal(stream_to_s8(err)); 1256 } 1257 } 1258 1259 { 1260 b32 all_supported = 1; 1261 #define X(name, ...) all_supported &= v11f.name; 1262 VK_REQUIRED_PHYSICAL_11_FEATURES 1263 #undef X 1264 1265 if (!all_supported) { 1266 stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n")); 1267 #define X(name, ...) if (!v11f.name) stream_append_s8(err, s8(" " #name "\n")); 1268 VK_REQUIRED_PHYSICAL_11_FEATURES 1269 #undef X 1270 fatal(stream_to_s8(err)); 1271 } 1272 } 1273 1274 { 1275 b32 all_supported = 1; 1276 #define X(name, ...) all_supported &= v12f.name; 1277 VK_REQUIRED_PHYSICAL_12_FEATURES 1278 #undef X 1279 1280 if (!all_supported) { 1281 stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n")); 1282 #define X(name, ...) if (!v12f.name) stream_append_s8(err, s8(" " #name "\n")); 1283 VK_REQUIRED_PHYSICAL_12_FEATURES 1284 #undef X 1285 fatal(stream_to_s8(err)); 1286 } 1287 } 1288 1289 { 1290 b32 all_supported = 1; 1291 #define X(name, ...) all_supported &= v13f.name; 1292 VK_REQUIRED_PHYSICAL_13_FEATURES 1293 #undef X 1294 1295 if (!all_supported) { 1296 stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n")); 1297 #define X(name, ...) if (!v13f.name) stream_append_s8(err, s8(" " #name "\n")); 1298 VK_REQUIRED_PHYSICAL_13_FEATURES 1299 #undef X 1300 fatal(stream_to_s8(err)); 1301 } 1302 } 1303 1304 if (vulkan_config.optional.cooperative_matrix) { 1305 Arena scratch = arena; 1306 u32 property_count = 0; 1307 vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR(vk->physical_device, &property_count, 0); 1308 1309 VkCooperativeMatrixPropertiesKHR *mat = push_array(&scratch, VkCooperativeMatrixPropertiesKHR, property_count); 1310 1311 // NOTE(rnp): validation layer stupidity 1312 for EachIndex(property_count, it) 1313 mat[it].sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR; 1314 1315 vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR(vk->physical_device, &property_count, mat); 1316 b32 supported = 0; 1317 // TODO(rnp): for now the requirements are hardcoded, it is possible to support a couple 1318 // variations if needed. 1319 for EachIndex(property_count, it) { 1320 b32 match = 1; 1321 supported &= mat[it].scope == VK_SCOPE_SUBGROUP_KHR; 1322 1323 supported &= mat[it].MSize == 16; 1324 supported &= mat[it].NSize == 16; 1325 supported &= mat[it].KSize == 16; 1326 1327 supported &= mat[it].AType == VK_COMPONENT_TYPE_FLOAT16_KHR; 1328 supported &= mat[it].BType == VK_COMPONENT_TYPE_FLOAT16_KHR; 1329 supported &= mat[it].CType == VK_COMPONENT_TYPE_FLOAT32_KHR; 1330 supported &= mat[it].ResultType == VK_COMPONENT_TYPE_FLOAT32_KHR; 1331 1332 supported |= match; 1333 } 1334 vk->gpu_info.cooperative_matrix = supported; 1335 } 1336 } 1337 1338 VkPhysicalDeviceMemoryProperties2 mp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2}; 1339 vkGetPhysicalDeviceMemoryProperties2(vk->physical_device, &mp); 1340 1341 VkPhysicalDeviceMemoryProperties *bmp = &mp.memoryProperties; 1342 1343 // NOTE(rnp): vulkan spec says that highest performance memory types must 1344 // come first. just take the first one found. 1345 1346 for (u32 i = 0; i < bmp->memoryHeapCount; i++) { 1347 if (bmp->memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { 1348 vk->memory_info.gpu_heap_index = i; 1349 break; 1350 } 1351 } 1352 1353 for (u32 i = 0; i < bmp->memoryTypeCount; i++) { 1354 if (bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { 1355 assert(bmp->memoryTypes[i].heapIndex == vk->memory_info.gpu_heap_index); 1356 vk->memory_info.memory_type_indices[VulkanMemoryKind_Device] = i; 1357 break; 1358 } 1359 } 1360 1361 // TODO(rnp): it is possible that this isn't available. for devices like that we would need 1362 // to copy into a staging buffer then DMA. For now that is unsupported. 1363 u32 bar_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT|VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; 1364 i32 bar_index = -1; 1365 for (u32 i = 0; i < bmp->memoryTypeCount; i++) { 1366 if ((bmp->memoryTypes[i].propertyFlags & bar_flags) == bar_flags) { 1367 assert(bmp->memoryTypes[i].heapIndex == vk->memory_info.gpu_heap_index); 1368 bar_index = (i32)i; 1369 break; 1370 } 1371 } 1372 1373 // TODO(rnp): this shouldn't be fatal 1374 if (bar_index == -1) { 1375 stream_append_s8(err, vulkan_info("fatal error: GPU does not support host bar memory\n")); 1376 fatal(stream_to_s8(err)); 1377 } 1378 1379 vk->memory_info.memory_type_indices[VulkanMemoryKind_BAR] = bar_index; 1380 1381 vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] = -1; 1382 for (u32 i = 0; i < bmp->memoryTypeCount; i++) { 1383 if ((bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) == 0) { 1384 if (bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { 1385 vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] = (i8)i; 1386 break; 1387 } 1388 } 1389 } 1390 1391 if (vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] == -1) { 1392 stream_append_s8(err, vulkan_info("fatal error: vulkan driver does not provide host visible memory\n")); 1393 fatal(stream_to_s8(err)); 1394 } 1395 1396 for EachElement(vk->memory_info.memory_type_indices, it) { 1397 u32 ti = vk->memory_info.memory_type_indices[it]; 1398 u32 flags = bmp->memoryTypes[ti].propertyFlags; 1399 vk->memory_info.memory_host_coherent[it] = (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; 1400 } 1401 1402 vulkan_config.driver_api_version = dp.properties.apiVersion; 1403 vk->memory_info.max_allocation_size = v11p.maxMemoryAllocationSize; 1404 vk->memory_info.non_coherent_atom_size = dp.properties.limits.nonCoherentAtomSize; 1405 vk->gpu_info.vendor = dp.properties.vendorID; 1406 vk->gpu_info.gpu_heap_size = bmp->memoryHeaps[vk->memory_info.gpu_heap_index].size; 1407 vk->gpu_info.timestamp_period_ns = dp.properties.limits.timestampPeriod; 1408 vk->gpu_info.max_image_dimension_2D = dp.properties.limits.maxImageDimension2D; 1409 vk->gpu_info.max_image_dimension_3D = dp.properties.limits.maxImageDimension3D; 1410 vk->gpu_info.max_msaa_samples = round_down_power_of_two(dp.properties.limits.framebufferColorSampleCounts); 1411 vk->gpu_info.subgroup_size = v11p.subgroupSize; 1412 vk->gpu_info.max_compute_shared_memory_size = dp.properties.limits.maxComputeSharedMemorySize; 1413 1414 // IMPORTANT(rnp): memory must only be pushed at the end of the function 1415 vk->gpu_info.name = push_s8(&vk->arena, c_str_to_s8(dp.properties.deviceName)); 1416 1417 #if BEAMFORMER_DEBUG 1418 { 1419 b32 mismatch = 0; 1420 for EachElement(vk_validation_layers, it) { 1421 u32 lv = vulkan_config.layers.version.E[it]; 1422 u32 dv = vulkan_config.driver_api_version; 1423 if (lv < dv) { 1424 mismatch = 1; 1425 stream_append_s8s(err, vulkan_info("warning: validaton layer \""), 1426 s8_from_str8(vk_validation_layers[it]), s8("\" version: ")); 1427 stream_appendf(err, "%u.%u.%u", VK_API_VERSION_MAJOR(lv), VK_API_VERSION_MINOR(lv), VK_API_VERSION_PATCH(lv)); 1428 stream_append_s8(err, s8(" lower than driver API version: ")); 1429 stream_appendf(err, "%u.%u.%u\n", VK_API_VERSION_MAJOR(dv), VK_API_VERSION_MINOR(dv), VK_API_VERSION_PATCH(dv)); 1430 } 1431 } 1432 1433 if (mismatch) 1434 stream_append_s8(err, vulkan_info("DO NOT report any bugs without updating your validation layers!\n")); 1435 } 1436 #endif 1437 } 1438 1439 function void 1440 vk_load_queues(Arena *memory, Stream *err) 1441 { 1442 /////////////////////////////////////////////////////// 1443 // NOTE(rnp): try to allocate an appropriate queue for 1444 // each of the following tasks: 1445 // * UI Rendering (Graphics) 1446 // * Beamforming (Compute) 1447 // * Upload (Transfer) 1448 // Then create a logical device ready for use 1449 1450 VulkanContext *vk = vulkan_context; 1451 1452 u32 queue_family_count; 1453 vkGetPhysicalDeviceQueueFamilyProperties(vk->physical_device, &queue_family_count, 0); 1454 1455 TempArena arena_save = begin_temp_arena(memory); 1456 VkQueueFamilyProperties *queues = push_array(memory, typeof(*queues), queue_family_count); 1457 vkGetPhysicalDeviceQueueFamilyProperties(vk->physical_device, &queue_family_count, queues); 1458 1459 i32 queue_indices[VulkanQueueKind_Count]; 1460 for EachElement(queue_indices, it) queue_indices[it] = -1; 1461 1462 /////////////////////////////////////////////////////////////// 1463 // NOTE(rnp): start by assigning queue families for each queue 1464 1465 /* NOTE(rnp): try for exclusive transfer queue */ 1466 #if !ForceSingleQueue 1467 { 1468 u32 mask = VK_QUEUE_GRAPHICS_BIT|VK_QUEUE_COMPUTE_BIT|VK_QUEUE_TRANSFER_BIT; 1469 u32 max_timestamp_bits = 0; 1470 for (u32 index = 0; index < queue_family_count; index++) { 1471 if ((queues[index].queueFlags & mask) == VK_QUEUE_TRANSFER_BIT) { 1472 if (queues[index].timestampValidBits > max_timestamp_bits) { 1473 max_timestamp_bits = queues[index].timestampValidBits; 1474 queue_indices[VulkanQueueKind_Transfer] = (i32)index; 1475 } 1476 } 1477 } 1478 } 1479 1480 /* NOTE(rnp): try for compute separate from graphics */ 1481 for (u32 index = 0; index < queue_family_count; index++) { 1482 if ((queues[index].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0 && 1483 (queues[index].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0) 1484 { 1485 queue_indices[VulkanQueueKind_Compute] = (i32)index; 1486 break; 1487 } 1488 } 1489 #endif /* !ForceSingleQueue */ 1490 1491 /* NOTE(rnp): find graphics family and verify it is exclusive */ 1492 b32 multi_graphics = 0; 1493 for (u32 index = 0; index < queue_family_count; index++) { 1494 if ((queues[index].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) { 1495 // TODO(rnp): check for presentation support 1496 multi_graphics = queue_indices[VulkanQueueKind_Graphics] != -1; 1497 queue_indices[VulkanQueueKind_Graphics] = (i32)index; 1498 } 1499 } 1500 1501 if (multi_graphics) 1502 stream_append_s8(err, vulkan_info("warning: multiple queue families reported graphics support\n")); 1503 1504 if (queue_indices[VulkanQueueKind_Graphics] == -1) { 1505 stream_append_s8(err, vulkan_info("fatal error: GPU does not support graphics presentation\n")); 1506 fatal(stream_to_s8(err)); 1507 } 1508 1509 if (queue_indices[VulkanQueueKind_Compute] == -1) 1510 if ((queues[queue_indices[VulkanQueueKind_Graphics]].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0) 1511 queue_indices[VulkanQueueKind_Compute] = queue_indices[VulkanQueueKind_Graphics]; 1512 1513 if (queue_indices[VulkanQueueKind_Compute] == -1) { 1514 stream_append_s8(err, vulkan_info("fatal error: GPU does not support compute\n")); 1515 fatal(stream_to_s8(err)); 1516 } 1517 1518 if (queue_indices[VulkanQueueKind_Transfer] == -1) { 1519 if ((queues[queue_indices[VulkanQueueKind_Compute]].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0) 1520 queue_indices[VulkanQueueKind_Transfer] = queue_indices[VulkanQueueKind_Compute]; 1521 else if ((queues[queue_indices[VulkanQueueKind_Graphics]].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0) 1522 queue_indices[VulkanQueueKind_Transfer] = queue_indices[VulkanQueueKind_Graphics]; 1523 } 1524 1525 if (queue_indices[VulkanQueueKind_Transfer] == -1) { 1526 stream_append_s8(err, vulkan_info("fatal error: GPU does not support data transfer\n")); 1527 fatal(stream_to_s8(err)); 1528 } 1529 1530 ///////////////////////////////////////////////////////////////// 1531 // NOTE(rnp): if queues share families try to allocate subqueues 1532 1533 u32 assigned_subindices[VulkanQueueKind_Count] = {0}; 1534 i32 queue_subindices[VulkanQueueKind_Count] = {0}; 1535 1536 assigned_subindices[VulkanQueueKind_Graphics] += 1; 1537 1538 if (queue_indices[VulkanQueueKind_Compute] == queue_indices[VulkanQueueKind_Graphics]) { 1539 if (assigned_subindices[VulkanQueueKind_Graphics] < queues[queue_indices[VulkanQueueKind_Graphics]].queueCount) 1540 queue_subindices[VulkanQueueKind_Compute] = assigned_subindices[VulkanQueueKind_Graphics]++; 1541 } else { 1542 assigned_subindices[VulkanQueueKind_Compute] += 1; 1543 } 1544 1545 if (queue_indices[VulkanQueueKind_Transfer] == queue_indices[VulkanQueueKind_Graphics]) { 1546 if (assigned_subindices[VulkanQueueKind_Graphics] < queues[queue_indices[VulkanQueueKind_Graphics]].queueCount) 1547 queue_subindices[VulkanQueueKind_Transfer] = assigned_subindices[VulkanQueueKind_Graphics]++; 1548 } else if (queue_indices[VulkanQueueKind_Transfer] == queue_indices[VulkanQueueKind_Compute]) { 1549 if (assigned_subindices[VulkanQueueKind_Compute] < queues[queue_indices[VulkanQueueKind_Compute]].queueCount) 1550 queue_subindices[VulkanQueueKind_Transfer] = assigned_subindices[VulkanQueueKind_Compute]++; 1551 } else { 1552 assigned_subindices[VulkanQueueKind_Transfer] += 1; 1553 } 1554 1555 for EachElement(assigned_subindices, it) 1556 vk->unique_queues += assigned_subindices[it]; 1557 1558 end_temp_arena(arena_save); 1559 1560 ///////////////////////////////////////////// 1561 // NOTE(rnp): fill in info and create device 1562 for EachElement(vk->queues, it) { 1563 u32 index = queue_subindices[it]; 1564 for (i32 i = 0; i < queue_indices[it]; i++) 1565 index += assigned_subindices[i]; 1566 vk->queue_indices[it] = index; 1567 } 1568 1569 for EachElement(vk->queues, it) { 1570 if (vk->queues[vk->queue_indices[it]] == 0) { 1571 vk->queues[vk->queue_indices[it]] = push_struct(memory, VulkanQueue); 1572 vk->queues[vk->queue_indices[it]]->queue_family = queue_indices[it]; 1573 vk->queues[vk->queue_indices[it]]->queue_index = queue_subindices[it]; 1574 } 1575 vk->queues[it] = vk->queues[vk->queue_indices[it]]; 1576 } 1577 1578 for EachElement(vk->command_pools, it) 1579 vk->command_pools[it] = push_struct(memory, VulkanCommandPool); 1580 1581 VkDeviceQueueCreateInfo queue_create_infos[VulkanQueueKind_Count]; 1582 1583 f32 queue_priorities[VulkanQueueKind_Count][VulkanQueueKind_Count]; 1584 for (u32 i = 0; i < VulkanQueueKind_Count; i++) 1585 for (u32 j = 0; j < VulkanQueueKind_Count; j++) 1586 queue_priorities[i][j] = 1.0f; 1587 queue_priorities[queue_indices[VulkanQueueKind_Compute]][queue_subindices[VulkanQueueKind_Compute]] = 0.5f; 1588 1589 u32 queue_create_index = 0; 1590 b32 queue_info_filled[VulkanQueueKind_Count] = {0}; 1591 for (u32 q = 0; q < vk->unique_queues; q++) { 1592 u32 base_q = queue_indices[q]; 1593 if (!queue_info_filled[base_q]) { 1594 queue_create_infos[queue_create_index++] = (VkDeviceQueueCreateInfo){ 1595 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, 1596 .queueFamilyIndex = base_q, 1597 .queueCount = assigned_subindices[q], 1598 .pQueuePriorities = queue_priorities[q], 1599 }; 1600 } 1601 queue_info_filled[base_q] = 1; 1602 } 1603 1604 u32 enabled_count = 0; 1605 const char *enabled_extensions[MAX_ENABLED_EXTENSIONS]; 1606 1607 for EachElement(vk_required_device_extensions, it) 1608 enabled_extensions[enabled_count++] = (char *)vk_required_device_extensions[it].data; 1609 1610 for EachElement(vk_optional_device_extensions, it) 1611 if (vulkan_config.optional.E[it]) 1612 enabled_extensions[enabled_count++] = (char *)vk_optional_device_extensions[it].data; 1613 1614 for EachElement(vk_debug_extensions, it) 1615 if (vulkan_config.debug.E[it]) 1616 enabled_extensions[enabled_count++] = (char *)vk_debug_extensions[it].data; 1617 1618 VkDeviceCreateInfo device_create_info = { 1619 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, 1620 .pQueueCreateInfos = queue_create_infos, 1621 .queueCreateInfoCount = queue_create_index, 1622 .ppEnabledExtensionNames = enabled_extensions, 1623 .enabledExtensionCount = enabled_count, 1624 }; 1625 1626 VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR pdsre = { 1627 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR, 1628 .shaderRelaxedExtendedInstruction = 1, 1629 }; 1630 if (vulkan_config.debug.shader_relaxed_extended_instruction) { 1631 pdsre.pNext = (void *)device_create_info.pNext; 1632 device_create_info.pNext = &pdsre; 1633 } 1634 1635 VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_mat_features = { 1636 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR, 1637 .cooperativeMatrix = 1, 1638 .cooperativeMatrixRobustBufferAccess = 0, 1639 }; 1640 if (vk->gpu_info.cooperative_matrix) { 1641 coop_mat_features.pNext = (void *)device_create_info.pNext; 1642 device_create_info.pNext = &coop_mat_features; 1643 } 1644 1645 VkPhysicalDeviceRobustness2FeaturesKHR robust2 = { 1646 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_KHR, 1647 .pNext = (void *)device_create_info.pNext, 1648 .nullDescriptor = 1, 1649 }; 1650 device_create_info.pNext = &robust2; 1651 1652 VkPhysicalDeviceVulkan13Features v13f = { 1653 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, 1654 .pNext = (void *)device_create_info.pNext, 1655 #define X(name, ...) .name = 1, 1656 VK_REQUIRED_PHYSICAL_13_FEATURES 1657 #undef X 1658 }; 1659 device_create_info.pNext = &v13f; 1660 1661 VkPhysicalDeviceVulkan12Features v12f = { 1662 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, 1663 .pNext = (void *)device_create_info.pNext, 1664 #define X(name, ...) .name = 1, 1665 VK_REQUIRED_PHYSICAL_12_FEATURES 1666 #undef X 1667 }; 1668 device_create_info.pNext = &v12f; 1669 1670 VkPhysicalDeviceVulkan11Features v11f = { 1671 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, 1672 .pNext = (void *)device_create_info.pNext, 1673 #define X(name, ...) .name = 1, 1674 VK_REQUIRED_PHYSICAL_11_FEATURES 1675 #undef X 1676 }; 1677 device_create_info.pNext = &v11f; 1678 1679 VkPhysicalDeviceFeatures2 device_features = { 1680 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, 1681 .pNext = (void *)device_create_info.pNext, 1682 .features = { 1683 #define X(name, ...) .name = 1, 1684 VK_REQUIRED_PHYSICAL_FEATURES 1685 #undef X 1686 }, 1687 }; 1688 device_create_info.pNext = &device_features; 1689 1690 vkCreateDevice(vk->physical_device, &device_create_info, 0, &vk->device); 1691 1692 #define X(name, ...) name = (name##_fn *)vkGetDeviceProcAddr(vk->device, #name); 1693 VkDeviceProcedureList 1694 #undef X 1695 1696 for (u32 q = 0; q < vk->unique_queues; q++) { 1697 VulkanQueue *qp = vk->queues[q]; 1698 vkGetDeviceQueue(vk->device, qp->queue_family, qp->queue_index, &qp->queue); 1699 1700 qp->timeline_semaphore = vk_make_semaphore(0); 1701 } 1702 1703 vk->queues[VulkanQueueKind_Graphics]->pipeline_stage_flags |= VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT; 1704 vk->queues[VulkanQueueKind_Compute]->pipeline_stage_flags |= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; 1705 1706 for EachElement(vk->command_pools, it) { 1707 VulkanCommandPool *vcp = vk->command_pools[it]; 1708 1709 VkCommandPoolCreateInfo command_pool_create_info = { 1710 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, 1711 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, 1712 .queueFamilyIndex = vk->queues[it]->queue_family, 1713 }; 1714 1715 vkCreateCommandPool(vk->device, &command_pool_create_info, 0, &vcp->handle); 1716 1717 VkCommandBufferAllocateInfo command_buffer_allocate_info = { 1718 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, 1719 .commandPool = vcp->handle, 1720 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1721 .commandBufferCount = countof(vcp->buffers), 1722 }; 1723 vkAllocateCommandBuffers(vk->device, &command_buffer_allocate_info, vcp->buffers); 1724 1725 VkQueryPoolCreateInfo query_pool_create_info = { 1726 .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, 1727 .queryType = VK_QUERY_TYPE_TIMESTAMP, 1728 .queryCount = MaxCommandBuffersInFlight * MaxCommandBufferTimestamps, 1729 }; 1730 vkCreateQueryPool(vk->device, &query_pool_create_info, 0, &vcp->query_pool); 1731 } 1732 } 1733 1734 function void 1735 vk_load_graphics(void) 1736 { 1737 VulkanContext *vk = vulkan_context; 1738 1739 // NOTE: swap chain image format 1740 { 1741 } 1742 1743 // NOTE: depth/stencil format 1744 { 1745 VkFormat depth_formats[] = { 1746 VK_FORMAT_D32_SFLOAT_S8_UINT, 1747 VK_FORMAT_D24_UNORM_S8_UINT, 1748 VK_FORMAT_D16_UNORM_S8_UINT, 1749 }; 1750 1751 vk->depth_stencil_format = VK_FORMAT_UNDEFINED; 1752 for EachElement(depth_formats, it) { 1753 VkFormatProperties3 format_properties3 = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3}; 1754 VkFormatProperties2 format_properties2 = { 1755 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, 1756 .pNext = &format_properties3, 1757 }; 1758 vkGetPhysicalDeviceFormatProperties2(vk->physical_device, depth_formats[it], &format_properties2); 1759 if (format_properties3.optimalTilingFeatures & VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT) { 1760 vk->depth_stencil_format = depth_formats[it]; 1761 break; 1762 } 1763 } 1764 } 1765 } 1766 1767 function void 1768 vk_load_descriptor_block(void) 1769 { 1770 // NOTE(rnp): 1771 // * One Descriptor Pool 1772 // * One Descriptor Set Per Resource Kind 1773 // * Shaders know the ResourceKind enumeration 1774 // * Shaders know the per set binding points 1775 1776 VulkanContext *vk = vulkan_context; 1777 1778 // NOTE(rnp): Pool 1779 VkDescriptorPoolSize pool_sizes[] = { 1780 { 1781 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1782 .descriptorCount = BeamformerShaderBufferSlot_Count, 1783 }, 1784 }; 1785 static_assert(countof(pool_sizes) == BeamformerShaderResourceKind_Count, ""); 1786 1787 VkDescriptorPoolCreateInfo pool_create_info = { 1788 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, 1789 .maxSets = BeamformerShaderResourceKind_Count, 1790 .poolSizeCount = countof(pool_sizes), 1791 .pPoolSizes = pool_sizes, 1792 }; 1793 1794 vkCreateDescriptorPool(vk->device, &pool_create_info, 0, &vk->descriptor_pool); 1795 1796 // NOTE(rnp): Set Layouts 1797 VkDescriptorSetLayoutCreateInfo layout_create_info = { 1798 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 1799 }; 1800 1801 { 1802 VkDescriptorSetLayoutBinding layout_bindings[BeamformerShaderBufferSlot_Count]; 1803 for EachEnumValue(BeamformerShaderBufferSlot, it) { 1804 layout_bindings[it] = (VkDescriptorSetLayoutBinding){ 1805 .binding = it, 1806 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1807 .descriptorCount = 1, 1808 .stageFlags = VK_SHADER_STAGE_ALL, 1809 }; 1810 } 1811 layout_create_info.bindingCount = countof(layout_bindings), 1812 layout_create_info.pBindings = layout_bindings, 1813 vkCreateDescriptorSetLayout(vk->device, &layout_create_info, 0, 1814 vk->descriptor_set_layouts + BeamformerShaderResourceKind_Buffer); 1815 } 1816 1817 // NOTE(rnp): Sets 1818 VkDescriptorSetAllocateInfo set_allocate_info = { 1819 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, 1820 .descriptorPool = vk->descriptor_pool, 1821 .descriptorSetCount = countof(vk->descriptor_sets), 1822 .pSetLayouts = vk->descriptor_set_layouts, 1823 }; 1824 static_assert(countof(vk->descriptor_set_layouts) == countof(vk->descriptor_sets), ""); 1825 vkAllocateDescriptorSets(vk->device, &set_allocate_info, vk->descriptor_sets); 1826 1827 vk_label_object(DESCRIPTOR_POOL, vk->descriptor_pool, s8("Beamformer Resources"), s8("Pool")); 1828 1829 DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock)) { 1830 Arena scratch = vk->arena; 1831 for EachElement(vk->descriptor_sets, it) { 1832 Stream sb = arena_stream(scratch); 1833 stream_append_s8s(&sb, s8("Beamformer "), beamformer_shader_resource_kind_strings[it], s8("s")); 1834 vk_label_object(DESCRIPTOR_SET, vk->descriptor_sets[it], stream_to_s8(&sb), s8("Set")); 1835 vk_label_object(DESCRIPTOR_SET_LAYOUT, vk->descriptor_set_layouts[it], stream_to_s8(&sb), s8("Set Layout")); 1836 } 1837 } 1838 1839 // NOTE(rnp): junk API requirement that doesn't allow 0 initialization 1840 for EachElement(vk->descriptor_buffer_infos, it) 1841 vk->descriptor_buffer_infos[it].range = VK_WHOLE_SIZE; 1842 } 1843 1844 /////////////////////// 1845 // NOTE(rnp): User API 1846 1847 DEBUG_IMPORT void 1848 vk_load(OSLibrary vulkan_library_handle, Arena *memory, Stream *err) 1849 { 1850 #define X(name, ...) name = (name##_fn *)os_lookup_symbol(vulkan_library_handle, #name); 1851 VkLoaderProcedureList 1852 #undef X 1853 1854 if (!vkGetInstanceProcAddr) { 1855 stream_append_s8(err, vulkan_info("fatal error: failed to find \"vkGetInstanceProcAddr\"\n")); 1856 fatal(stream_to_s8(err)); 1857 } 1858 1859 VulkanContext *vk = vulkan_context; 1860 vk->entity_arena = sub_arena_end(memory, KB(64), KB(4)); 1861 vk->arena = sub_arena_end(memory, KB(96), KB(4)); 1862 1863 vk_load_instance(vk->arena, err); 1864 vk_load_physical_device(vk->arena, err); 1865 vk_load_queues(&vk->arena, err); 1866 vk_load_graphics(); 1867 vk_load_descriptor_block(); 1868 1869 read_only local_persist s8 default_compute_shader = s8("" 1870 "#version 430 core\n" 1871 "layout(push_constant) uniform pc { uint data[256 / 4]; };\n" 1872 "void main() {}\n" 1873 "\n"); 1874 vk->default_compute_pipeline = vk_compute_pipeline_from_shader_text(vk->arena, default_compute_shader, 1875 s8("error_compute_shader"), 256); 1876 1877 read_only local_persist s8 default_vertex_shader = s8("" 1878 "#version 430 core\n" 1879 "layout(push_constant) uniform pc { uint data[256 / 4]; };\n" 1880 "void main() {gl_Position = vec4(0);}\n" 1881 "\n"); 1882 read_only local_persist s8 default_fragment_shader = s8("" 1883 "#version 430 core\n" 1884 "layout(location = 0) out vec4 out_colour;" 1885 "layout(push_constant) uniform pc { uint data[256 / 4]; };\n" 1886 "void main() {out_colour = vec4(0.5f, 0.0f, 0.5f, 1.0f);}\n" 1887 "\n"); 1888 1889 VulkanPipelineCreateInfo pipeline_create_infos[2] = { 1890 { 1891 .kind = VulkanShaderKind_Vertex, 1892 .text = default_vertex_shader, 1893 .name = s8("error_vertex_shader"), 1894 }, 1895 { 1896 .kind = VulkanShaderKind_Fragment, 1897 .text = default_fragment_shader, 1898 .name = s8("error_fragment_shader"), 1899 }, 1900 }; 1901 vk->default_graphics_pipeline = vk_graphics_pipeline_from_infos(vk->arena, pipeline_create_infos, 2, 256); 1902 1903 // TODO: setup ui render pipeline 1904 1905 if (err->widx > 0) { 1906 os_console_log(err->data, err->widx); 1907 stream_reset(err, 0); 1908 } 1909 } 1910 1911 DEBUG_IMPORT GPUInfo * 1912 vk_gpu_info(void) 1913 { 1914 return &vulkan_context->gpu_info; 1915 } 1916 1917 function void 1918 vk_vulkan_buffer_release(VulkanBuffer *vb) 1919 { 1920 VulkanContext *vk = vulkan_context; 1921 VulkanEntity *e = (VulkanEntity *)((u8 *)vb - offsetof(VulkanEntity, as)); 1922 // TODO(rnp): this happens implicitly, probably just delete this if block 1923 if (vb->host_pointer) 1924 vkUnmapMemory(vk->device, vb->memory); 1925 1926 if (vb->buffer) 1927 vkDestroyBuffer(vk->device, vb->buffer, 0); 1928 1929 vk_release_memory(vb->memory, vb->memory_kind != VulkanMemoryKind_Host ? vb->memory_size : 0); 1930 vk_entity_release(e); 1931 } 1932 1933 DEBUG_IMPORT void 1934 vk_buffer_release(GPUBuffer *b) 1935 { 1936 if ValidVulkanHandle(b->handle) 1937 vk_vulkan_buffer_release(vk_entity_data(b->handle, VulkanEntityKind_Buffer)); 1938 zero_struct(b); 1939 } 1940 1941 DEBUG_IMPORT void 1942 vk_buffer_allocate(GPUBuffer *b, GPUBufferAllocateInfo *info) 1943 { 1944 VulkanContext *vk = vulkan_context; 1945 1946 vk_buffer_release(b); 1947 1948 assert(info->size > 0); 1949 1950 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Buffer); 1951 VulkanBufferAllocateInfo vulkan_buffer_allocate_info = { 1952 .gpu_buffer = b, 1953 .size = (u64)info->size, 1954 .flags = info->flags, 1955 .index_type = VK_INDEX_TYPE_NONE_KHR, 1956 .label = info->label, 1957 }; 1958 1959 u32 queue_index_hit_count[VulkanQueueKind_Count] = {0}; 1960 for (u32 it = 0; it < info->timeline_count; it++) 1961 queue_index_hit_count[vk->queue_indices[info->timelines_used[it]]]++; 1962 1963 for EachElement(queue_index_hit_count, it) { 1964 if (queue_index_hit_count[it] > 0) { 1965 u32 index = vulkan_buffer_allocate_info.queue_family_count++; 1966 vulkan_buffer_allocate_info.queue_family_indices[index] = vk->queues[vk->queue_indices[it]]->queue_family; 1967 } 1968 } 1969 1970 if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) { 1971 b->handle.value[0] = (u64)e; 1972 } else { 1973 vk_entity_release(e); 1974 } 1975 } 1976 1977 DEBUG_IMPORT b32 1978 vk_buffer_needs_sync(GPUBuffer *b) 1979 { 1980 b32 result = 0; 1981 if ValidVulkanHandle(b->handle) { 1982 VulkanBuffer *vb = vk_entity_data(b->handle, VulkanEntityKind_Buffer); 1983 1984 // TODO(rnp): not correct check. need to check if we used transfer queue 1985 result = vb->memory_kind != VulkanMemoryKind_BAR; 1986 } 1987 1988 return result; 1989 } 1990 1991 DEBUG_IMPORT u64 1992 vk_round_up_to_sync_size(u64 size, u64 min) 1993 { 1994 iz round = (iz)Max(min, vulkan_context->memory_info.non_coherent_atom_size); 1995 u64 result = (u64)round_up_to((iz)size, round); 1996 return result; 1997 } 1998 1999 function force_inline void 2000 vk_buffer_buffer_copy(VulkanBuffer *destination, VulkanBuffer *source, u64 destination_offset, u64 source_offset, u64 size, b32 non_temporal) 2001 { 2002 VulkanContext *vk = vulkan_context; 2003 2004 switch (source->memory_kind) { 2005 case VulkanMemoryKind_BAR: 2006 { 2007 switch (destination->memory_kind) { 2008 case VulkanMemoryKind_Host:{ 2009 if (destination->memory) { 2010 // TODO(rnp): there is likely a more efficient way of doing this in this case 2011 InvalidCodePath; 2012 } else { 2013 assert(source->host_pointer); 2014 b32 coherent = vk->memory_info.memory_host_coherent[source->memory_kind]; 2015 if (!coherent) { 2016 u64 nca_size = vk->memory_info.non_coherent_atom_size; 2017 VkMappedMemoryRange mrs[1] = {{ 2018 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, 2019 .memory = source->memory, 2020 .offset = source_offset - (source_offset % nca_size), 2021 .size = vk_round_up_to_sync_size(size, nca_size), 2022 }}; 2023 vkInvalidateMappedMemoryRanges(vk->device, countof(mrs), mrs); 2024 } 2025 2026 void *dest = (u8 *)destination->host_pointer + destination_offset; 2027 void *src = (u8 *)source->host_pointer + source_offset; 2028 2029 // NOTE(rnp): don't trash the CPU cache for large data stores 2030 if (non_temporal) memory_copy_non_temporal(dest, src, size); 2031 else mem_copy(dest, src, size); 2032 } 2033 }break; 2034 InvalidDefaultCase; 2035 } 2036 }break; 2037 2038 case VulkanMemoryKind_Host:{ 2039 switch (destination->memory_kind) { 2040 case VulkanMemoryKind_BAR:{ 2041 assert(destination->host_pointer); 2042 2043 void *dest = (u8 *)destination->host_pointer + destination_offset; 2044 void *src = (u8 *)source->host_pointer + source_offset; 2045 2046 // NOTE(rnp): don't trash the CPU cache for large data stores 2047 if (non_temporal) memory_copy_non_temporal(dest, src, size); 2048 else mem_copy(dest, src, size); 2049 2050 b32 coherent = vk->memory_info.memory_host_coherent[destination->memory_kind]; 2051 if (!coherent) { 2052 u64 nca_size = vk->memory_info.non_coherent_atom_size; 2053 VkMappedMemoryRange mrs[1] = {{ 2054 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, 2055 .memory = destination->memory, 2056 .offset = destination_offset - (destination_offset % nca_size), 2057 .size = vk_round_up_to_sync_size(size, nca_size), 2058 }}; 2059 vkFlushMappedMemoryRanges(vk->device, countof(mrs), mrs); 2060 } 2061 }break; 2062 InvalidDefaultCase; 2063 2064 } 2065 }break; 2066 2067 // TODO(rnp): use transfer queue when not mapped 2068 InvalidDefaultCase; 2069 } 2070 } 2071 2072 DEBUG_IMPORT void 2073 vk_buffer_range_upload(GPUBuffer *b, void *data, u64 offset, u64 size, b32 non_temporal) 2074 { 2075 VulkanBuffer *db = vk_entity_data(b->handle, VulkanEntityKind_Buffer); 2076 VulkanBuffer sb = { 2077 .host_pointer = data, 2078 .memory_kind = VulkanMemoryKind_Host, 2079 }; 2080 vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal); 2081 } 2082 2083 DEBUG_IMPORT void 2084 vk_buffer_range_download(void *destination, GPUBuffer *source, u64 offset, u64 size, b32 non_temporal) 2085 { 2086 VulkanBuffer *sb = vk_entity_data(source->handle, VulkanEntityKind_Buffer); 2087 VulkanBuffer db = { 2088 .host_pointer = destination, 2089 .memory_kind = VulkanMemoryKind_Host, 2090 }; 2091 vk_buffer_buffer_copy(&db, sb, 0, offset, size, non_temporal); 2092 } 2093 2094 DEBUG_IMPORT void 2095 vk_render_model_release(GPUBuffer *model) 2096 { 2097 if ValidVulkanHandle(model->handle) 2098 vk_vulkan_buffer_release(vk_entity_data(model->handle, VulkanEntityKind_RenderModel)); 2099 zero_struct(model); 2100 } 2101 2102 DEBUG_IMPORT void 2103 vk_render_model_allocate(GPUBuffer *model, void *indices, u64 index_count, u64 model_size, s8 label) 2104 { 2105 vk_render_model_release(model); 2106 2107 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_RenderModel); 2108 2109 assert(index_count <= U32_MAX); 2110 VkIndexType index_type; 2111 if (index_count <= U16_MAX) index_type = VK_INDEX_TYPE_UINT16; 2112 else index_type = VK_INDEX_TYPE_UINT32; 2113 2114 i64 indices_size = round_up_to(vk_index_size(index_type) * index_count, 64); 2115 2116 i64 size = round_up_to(model_size + indices_size, 64); 2117 assert(size > 0); 2118 2119 VulkanBufferAllocateInfo vulkan_buffer_allocate_info = { 2120 .gpu_buffer = model, 2121 .size = (u64)size, 2122 .flags = VulkanUsageFlag_HostReadWrite, 2123 .index_type = index_type, 2124 .label = label, 2125 .queue_family_count = 1, 2126 .queue_family_indices[0] = vulkan_context->queues[VulkanQueueKind_Graphics]->queue_family, 2127 }; 2128 if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) { 2129 model->handle.value[0] = (u64)e; 2130 model->index_count = index_count; 2131 model->gpu_pointer += indices_size; 2132 2133 VulkanBuffer sb = { 2134 .host_pointer = indices, 2135 .memory_kind = VulkanMemoryKind_Host, 2136 }; 2137 2138 vk_buffer_buffer_copy(&e->as.buffer, &sb, 0, 0, vk_index_size(index_type) * index_count, 0); 2139 } else { 2140 vk_entity_release(e); 2141 } 2142 } 2143 2144 DEBUG_IMPORT void 2145 vk_render_model_range_upload(GPUBuffer *model, void *data, u64 offset, u64 size, b32 non_temporal) 2146 { 2147 VulkanBuffer *db = vk_entity_data(model->handle, VulkanEntityKind_RenderModel); 2148 VulkanBuffer sb = { 2149 .host_pointer = data, 2150 .memory_kind = VulkanMemoryKind_Host, 2151 }; 2152 2153 offset += round_up_to(vk_index_size(db->index_type) * model->index_count, 64); 2154 2155 vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal); 2156 } 2157 2158 DEBUG_IMPORT void 2159 vk_image_release(GPUImage *image) 2160 { 2161 if ValidVulkanHandle(image->image) { 2162 VulkanContext *vk = vulkan_context; 2163 VulkanImage *vi = vk_entity_data(image->image, VulkanEntityKind_Image); 2164 2165 vkDestroyImageView(vk->device, vi->view, 0); 2166 vkDestroyImage(vk->device, vi->image, 0); 2167 vk_release_memory(vi->memory, image->memory_size); 2168 2169 vk_entity_release((VulkanEntity *)image->image.value[0]); 2170 } 2171 zero_struct(image); 2172 } 2173 2174 DEBUG_IMPORT void 2175 vk_image_allocate(GPUImage *image, u32 width, u32 height, u32 mips, u32 samples, 2176 VulkanImageUsage usage, VulkanUsageFlags flags, OSHandle *export, s8 label) 2177 { 2178 assert(IsPowerOfTwo(samples)); 2179 2180 vk_image_release(image); 2181 2182 VulkanContext *vk = vulkan_context; 2183 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Image); 2184 VulkanImage *vi = &e->as.image; 2185 2186 image->image.value[0] = (u64)e; 2187 image->width = Min(width, vk->gpu_info.max_image_dimension_2D); 2188 image->height = Min(height, vk->gpu_info.max_image_dimension_2D); 2189 image->mip_map_levels = Max(mips, 1); 2190 image->samples = Min(samples, vk->gpu_info.max_msaa_samples); 2191 2192 VkFormat usage_format_map[VulkanImageUsage_Count + 1] = { 2193 [VulkanImageUsage_None] = VK_FORMAT_UNDEFINED, 2194 //[VulkanImageUsage_Colour] = VK_FORMAT_R8G8B8A8_SRGB, 2195 [VulkanImageUsage_Colour] = VK_FORMAT_R8G8B8A8_UNORM, 2196 [VulkanImageUsage_DepthStencil] = vk->depth_stencil_format, 2197 [VulkanImageUsage_Count] = VK_FORMAT_UNDEFINED, 2198 }; 2199 2200 read_only local_persist VkImageUsageFlagBits usage_extra_bit_map[VulkanImageUsage_Count + 1] = { 2201 [VulkanImageUsage_None] = 0, 2202 [VulkanImageUsage_Colour] = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 2203 [VulkanImageUsage_DepthStencil] = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 2204 [VulkanImageUsage_Count] = 0, 2205 }; 2206 2207 read_only local_persist VkImageAspectFlags usage_image_aspect_map[VulkanImageUsage_Count + 1] = { 2208 [VulkanImageUsage_None] = 0, 2209 [VulkanImageUsage_Colour] = VK_IMAGE_ASPECT_COLOR_BIT, 2210 [VulkanImageUsage_DepthStencil] = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT, 2211 [VulkanImageUsage_Count] = 0, 2212 }; 2213 2214 usage = Clamp((u32)usage, 0, VulkanImageUsage_Count); 2215 VkImageUsageFlagBits usage_flags = usage_extra_bit_map[usage]; 2216 2217 if (flags & VulkanUsageFlag_ImageSampling) usage_flags |= VK_IMAGE_USAGE_SAMPLED_BIT; 2218 if (flags & VulkanUsageFlag_TransferSource) usage_flags |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; 2219 if (flags & VulkanUsageFlag_TransferDestination) usage_flags |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; 2220 2221 u32 queue_family = vk->queues[VulkanQueueKind_Graphics]->queue_family; 2222 VkImageCreateInfo image_create_info = { 2223 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 2224 .flags = export ? VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT : 0, 2225 .imageType = VK_IMAGE_TYPE_2D, 2226 .format = usage_format_map[usage], 2227 .extent = {image->width, image->height, 1}, 2228 .mipLevels = image->mip_map_levels, 2229 .arrayLayers = 1, 2230 .samples = image->samples, 2231 .tiling = VK_IMAGE_TILING_OPTIMAL, 2232 .usage = usage_flags, 2233 // NOTE(rnp): needed if multiple queue families are accessed 2234 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 2235 .queueFamilyIndexCount = 1, 2236 .pQueueFamilyIndices = &queue_family, 2237 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 2238 }; 2239 2240 VkExternalMemoryImageCreateInfo external_memory_image_create_info = { 2241 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, 2242 .handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT 2243 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, 2244 }; 2245 2246 if (export) image_create_info.pNext = &external_memory_image_create_info; 2247 2248 vkCreateImage(vk->device, &image_create_info, 0, &vi->image); 2249 2250 VkMemoryRequirements memory_requirements; 2251 vkGetImageMemoryRequirements(vk->device, vi->image, &memory_requirements); 2252 2253 VkMemoryDedicatedAllocateInfo dedicated_allocate_info = { 2254 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, 2255 .image = vi->image, 2256 }; 2257 2258 if (vk_allocate_memory(&vi->memory, memory_requirements.size, VulkanMemoryKind_Device, 0, &dedicated_allocate_info, export)) { 2259 image->memory_size = memory_requirements.size; 2260 vkBindImageMemory(vk->device, vi->image, vi->memory, 0); 2261 2262 VkImageViewCreateInfo image_view_info = { 2263 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 2264 .image = vi->image, 2265 .viewType = VK_IMAGE_VIEW_TYPE_2D, 2266 .format = usage_format_map[usage], 2267 .subresourceRange = { 2268 .aspectMask = usage_image_aspect_map[usage], 2269 .baseMipLevel = 0, 2270 .levelCount = 1, 2271 .baseArrayLayer = 0, 2272 .layerCount = 1, 2273 }, 2274 }; 2275 vkCreateImageView(vk->device, &image_view_info, 0, &vi->view); 2276 2277 vk_label_object(IMAGE, vi->image, label, s8("Image")); 2278 vk_label_object(IMAGE_VIEW, vi->view, label, s8("Image View")); 2279 vk_label_object(DEVICE_MEMORY, vi->memory, label, s8("Memory")); 2280 } else { 2281 vkDestroyImage(vk->device, vi->image, 0); 2282 vk_entity_release(e); 2283 zero_struct(image); 2284 } 2285 } 2286 2287 DEBUG_IMPORT VulkanHandle 2288 vk_create_semaphore(OSHandle *export) 2289 { 2290 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Semaphore); 2291 e->as.semaphore = vk_make_semaphore(export); 2292 VulkanHandle result = {(u64)e}; 2293 return result; 2294 } 2295 2296 DEBUG_IMPORT b32 2297 vk_host_wait_timeline(VulkanTimeline timeline, u64 value, u64 timeout_ns) 2298 { 2299 b32 result = 0; 2300 if Between(timeline, 0, VulkanTimeline_Count - 1) { 2301 VulkanContext *vk = vulkan_context; 2302 VulkanQueue *vq = vk->queues[timeline]; 2303 VkSemaphoreWaitInfo semaphore_wait_info = { 2304 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, 2305 .pSemaphores = &vq->timeline_semaphore.semaphore, 2306 .semaphoreCount = 1, 2307 .pValues = &value, 2308 }; 2309 result = vkWaitSemaphores(vk->device, &semaphore_wait_info, timeout_ns) == VK_SUCCESS; 2310 } 2311 return result; 2312 } 2313 2314 DEBUG_IMPORT u64 2315 vk_host_signal_timeline(VulkanTimeline timeline) 2316 { 2317 u64 result = -1; 2318 if Between(timeline, 0, VulkanTimeline_Count - 1) { 2319 VulkanContext *vk = vulkan_context; 2320 VulkanQueue *vq = vk->queues[timeline]; 2321 VulkanSemaphore *vs = &vq->timeline_semaphore; 2322 result = ++vs->value; 2323 VkSemaphoreSignalInfo ssi = { 2324 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, 2325 .semaphore = vs->semaphore, 2326 .value = result, 2327 }; 2328 vkSignalSemaphore(vk->device, &ssi); 2329 } 2330 return result; 2331 } 2332 2333 DEBUG_IMPORT VulkanHandle 2334 vk_pipeline(VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size) 2335 { 2336 assert(Between(count, 1, 2)); 2337 assert(count == 2 || infos[0].kind == VulkanShaderKind_Compute); 2338 2339 VulkanHandle result = {0}; 2340 DeferLoop(take_lock(&vulkan_context->arena_lock, -1), release_lock(&vulkan_context->arena_lock)) 2341 { 2342 Arena arena = vulkan_context->arena; 2343 2344 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Pipeline); 2345 result = (VulkanHandle){(u64)e}; 2346 2347 if (count == 2) e->as.pipeline = vk_graphics_pipeline_from_infos(arena, infos, count, push_constants_size); 2348 else e->as.pipeline = vk_compute_pipeline_from_shader_text(arena, infos[0].text, infos[0].name, push_constants_size); 2349 } 2350 return result; 2351 } 2352 2353 DEBUG_IMPORT b32 2354 vk_pipeline_valid(VulkanHandle h) 2355 { 2356 b32 result = 0; 2357 if ValidVulkanHandle(h) { 2358 VulkanPipeline *vp = vk_entity_data(h, VulkanEntityKind_Pipeline); 2359 if (vp->stage_flags == VK_SHADER_STAGE_COMPUTE_BIT) 2360 result = vp->pipeline != vulkan_context->default_compute_pipeline.pipeline; 2361 else 2362 result = vp->pipeline != vulkan_context->default_graphics_pipeline.pipeline; 2363 } 2364 return result; 2365 } 2366 2367 DEBUG_IMPORT void 2368 vk_pipeline_release(VulkanHandle h) 2369 { 2370 if (vk_pipeline_valid(h)) { 2371 VulkanEntity *e = (VulkanEntity *)h.value[0]; 2372 VulkanTimeline timeline; 2373 if (e->as.pipeline.stage_flags == VK_SHADER_STAGE_COMPUTE_BIT) timeline = VulkanTimeline_Compute; 2374 else timeline = VulkanTimeline_Graphics; 2375 2376 // NOTE(rnp): block more command buffers from being recorded 2377 VulkanCommandPool *vcp = vulkan_context->command_pools[timeline]; 2378 DeferLoop(take_lock(&vcp->lock, -1), release_lock(&vcp->lock)) { 2379 u32 index = (vcp->next_index - 1) % countof(vcp->buffers); 2380 vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL); 2381 vkDestroyPipeline(vulkan_context->device, e->as.pipeline.pipeline, 0); 2382 vkDestroyPipelineLayout(vulkan_context->device, e->as.pipeline.layout, 0); 2383 2384 if (&e->as.pipeline == vcp->bound_pipeline) 2385 vcp->bound_pipeline = 0; 2386 } 2387 vk_entity_release(e); 2388 } 2389 } 2390 2391 DEBUG_IMPORT void 2392 vk_bind_shader_resources(BeamformerShaderResourceInfo *infos, u64 info_count) 2393 { 2394 VulkanContext *vk = vulkan_context; 2395 2396 VkWriteDescriptorSet write_sets[BeamformerShaderResourceKind_Count] = {0}; 2397 2398 for EachIndex(info_count, it) { 2399 switch (infos[it].kind) { 2400 case BeamformerShaderResourceKind_Buffer:{ 2401 VulkanBuffer *vb = vk_entity_data(infos[it].handle, VulkanEntityKind_Buffer); 2402 vk->descriptor_buffer_infos[infos[it].slot].buffer = vb->buffer; 2403 vk->descriptor_buffer_infos[infos[it].slot].offset = 0; 2404 vk->descriptor_buffer_infos[infos[it].slot].range = vb->memory_size; 2405 }break; 2406 2407 InvalidDefaultCase; 2408 } 2409 } 2410 2411 write_sets[BeamformerShaderResourceKind_Buffer].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; 2412 write_sets[BeamformerShaderResourceKind_Buffer].dstSet = vk->descriptor_sets[BeamformerShaderResourceKind_Buffer]; 2413 write_sets[BeamformerShaderResourceKind_Buffer].dstBinding = 0; 2414 write_sets[BeamformerShaderResourceKind_Buffer].descriptorCount = countof(vk->descriptor_buffer_infos); 2415 write_sets[BeamformerShaderResourceKind_Buffer].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 2416 write_sets[BeamformerShaderResourceKind_Buffer].pBufferInfo = vk->descriptor_buffer_infos; 2417 2418 vkUpdateDescriptorSets(vk->device, countof(write_sets), write_sets, 0, 0); 2419 } 2420 2421 DEBUG_IMPORT VulkanHandle 2422 vk_command_begin(VulkanTimeline timeline) 2423 { 2424 VulkanHandle result = {0}; 2425 if Between(timeline, 0, VulkanTimeline_Count - 1) { 2426 VulkanContext *vk = vulkan_context; 2427 VulkanCommandPool *vcp = vk->command_pools[timeline]; 2428 2429 take_lock(&vcp->lock, -1); 2430 2431 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_CommandBuffer); 2432 VulkanCommandBuffer *vcb = &e->as.command_buffer; 2433 vcb->timeline = timeline; 2434 vcb->buffer_index = vcp->next_index++ % countof(vcp->buffers); 2435 2436 u32 index = vcb->buffer_index; 2437 // TODO(rnp): probably not the best to have this here but it will likely not be hit 2438 b32 wait_result = vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL); 2439 assert(wait_result); 2440 2441 vcp->queries_occupied[index] = 0; 2442 2443 VkCommandBufferBeginInfo buffer_begin_info = { 2444 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, 2445 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, 2446 }; 2447 2448 vkBeginCommandBuffer(vcp->buffers[index], &buffer_begin_info); 2449 vkCmdResetQueryPool(vcp->buffers[index], vcp->query_pool, index * MaxCommandBufferTimestamps, 2450 MaxCommandBufferTimestamps); 2451 2452 result = (VulkanHandle){(u64)e}; 2453 } 2454 return result; 2455 } 2456 2457 DEBUG_IMPORT void 2458 vk_command_bind_pipeline(VulkanHandle command, VulkanHandle pipeline) 2459 { 2460 if ValidVulkanHandle(command) { 2461 VulkanContext *vk = vulkan_context; 2462 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2463 VulkanCommandPool *vcp = vk->command_pools[vcb->timeline]; 2464 2465 VulkanPipeline *vp = 0; 2466 if ValidVulkanHandle(pipeline) { 2467 vp = vk_entity_data(pipeline, VulkanEntityKind_Pipeline); 2468 } else if (vcb->timeline == VulkanTimeline_Compute) { 2469 vp = &vk->default_compute_pipeline; 2470 } else if (vcb->timeline == VulkanTimeline_Graphics) { 2471 vp = &vk->default_graphics_pipeline; 2472 } else { 2473 InvalidCodePath; 2474 } 2475 2476 read_only local_persist VkPipelineBindPoint bind_point_lut[VulkanTimeline_Count] = { 2477 [VulkanTimeline_Graphics] = VK_PIPELINE_BIND_POINT_GRAPHICS, 2478 [VulkanTimeline_Compute] = VK_PIPELINE_BIND_POINT_COMPUTE, 2479 [VulkanTimeline_Transfer] = -1, 2480 }; 2481 2482 VkPipelineBindPoint bind_point = bind_point_lut[vcb->timeline]; 2483 assert(bind_point != (VkPipelineBindPoint)-1); 2484 2485 vkCmdBindPipeline(vcp->buffers[vcb->buffer_index], bind_point, vp->pipeline); 2486 vkCmdBindDescriptorSets(vcp->buffers[vcb->buffer_index], bind_point, vp->layout, 2487 0, countof(vk->descriptor_sets), vk->descriptor_sets, 0, 0); 2488 vcp->bound_pipeline = vp; 2489 } 2490 } 2491 2492 DEBUG_IMPORT void 2493 vk_command_buffer_memory_barriers(VulkanHandle command, GPUMemoryBarrierInfo *barriers, u64 count) 2494 { 2495 if ValidVulkanHandle(command) { 2496 VulkanContext *vk = vulkan_context; 2497 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2498 VulkanCommandPool *vcp = vk->command_pools[vcb->timeline]; 2499 VulkanQueue *vq = vk->queues[vcb->timeline]; 2500 2501 DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock)) 2502 { 2503 Arena arena = vk->arena; 2504 u32 valid_count = 0; 2505 VkBufferMemoryBarrier2 *memory_barriers = push_array(&arena, VkBufferMemoryBarrier2, count); 2506 for (u64 it = 0; it < count; it++) { 2507 if ValidVulkanHandle(barriers[it].gpu_buffer->handle) { 2508 u32 index = valid_count++; 2509 VulkanBuffer *vb = vk_entity_data(barriers[it].gpu_buffer->handle, VulkanEntityKind_Buffer); 2510 memory_barriers[index].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2; 2511 memory_barriers[index].srcStageMask = vq->pipeline_stage_flags; 2512 memory_barriers[index].srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT; 2513 memory_barriers[index].dstStageMask = vq->pipeline_stage_flags; 2514 memory_barriers[index].dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT; 2515 memory_barriers[index].srcQueueFamilyIndex = vq->queue_family; 2516 memory_barriers[index].dstQueueFamilyIndex = vq->queue_family; 2517 memory_barriers[index].buffer = vb->buffer; 2518 memory_barriers[index].offset = barriers[it].offset; 2519 memory_barriers[index].size = barriers[it].size; 2520 } 2521 } 2522 2523 VkDependencyInfo dependancy_info = { 2524 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, 2525 .bufferMemoryBarrierCount = valid_count, 2526 .pBufferMemoryBarriers = memory_barriers, 2527 }; 2528 2529 vkCmdPipelineBarrier2(vcp->buffers[vcb->buffer_index], &dependancy_info); 2530 } 2531 } 2532 } 2533 2534 DEBUG_IMPORT void 2535 vk_command_dispatch_compute(VulkanHandle command, uv3 dispatch) 2536 { 2537 assert(dispatch.x <= U16_MAX); 2538 assert(dispatch.y <= U16_MAX); 2539 assert(dispatch.z <= U16_MAX); 2540 if ValidVulkanHandle(command) { 2541 VkCommandBuffer cmd = vk_command_buffer(command); 2542 vkCmdDispatch(cmd, dispatch.x, dispatch.y, dispatch.z); 2543 } 2544 } 2545 2546 DEBUG_IMPORT void 2547 vk_command_push_constants(VulkanHandle command, u32 offset, u32 size, void *values) 2548 { 2549 if ValidVulkanHandle(command) { 2550 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2551 VulkanCommandPool *vcp = vulkan_context->command_pools[vcb->timeline]; 2552 VulkanPipeline *vp = vcp->bound_pipeline; 2553 2554 assert(vp); 2555 2556 vkCmdPushConstants(vcp->buffers[vcb->buffer_index], vp->layout, vp->stage_flags, offset, size, values); 2557 } 2558 } 2559 2560 DEBUG_IMPORT void 2561 vk_command_timestamp(VulkanHandle command) 2562 { 2563 if ValidVulkanHandle(command) { 2564 VulkanContext *vk = vulkan_context; 2565 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2566 VulkanCommandPool *vcp = vk->command_pools[vcb->timeline]; 2567 2568 read_only local_persist VkPipelineStageFlags2 stage_lut[VulkanTimeline_Count] = { 2569 [VulkanTimeline_Graphics] = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT, 2570 [VulkanTimeline_Compute] = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, 2571 [VulkanTimeline_Transfer] = -1, 2572 }; 2573 2574 VkPipelineStageFlags2 stage = stage_lut[vcb->timeline]; 2575 assert(stage != (VkPipelineStageFlags2)-1); 2576 2577 if (vcp->queries_occupied[vcb->buffer_index] < MaxCommandBufferTimestamps) { 2578 u32 query_index = vcp->queries_occupied[vcb->buffer_index]++; 2579 vkCmdWriteTimestamp2(vcp->buffers[vcb->buffer_index], stage, vcp->query_pool, 2580 vcb->buffer_index * MaxCommandBufferTimestamps + query_index); 2581 } 2582 } 2583 } 2584 2585 DEBUG_IMPORT void 2586 vk_command_wait_timeline(VulkanHandle command, VulkanTimeline timeline, u64 value) 2587 { 2588 if (ValidVulkanHandle(command) && Between(timeline, 0, VulkanTimeline_Count - 1)) { 2589 VulkanContext *vk = vulkan_context; 2590 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2591 2592 u32 wait_index = vk->queue_indices[timeline]; 2593 vcb->in_flight_wait_values[wait_index] = Max(value, vcb->in_flight_wait_values[wait_index]); 2594 } 2595 } 2596 2597 DEBUG_IMPORT u64 2598 vk_command_end(VulkanHandle command, VulkanHandle wait_semaphore, VulkanHandle finished_semaphore) 2599 { 2600 u64 result = -1; 2601 if ValidVulkanHandle(command) { 2602 VulkanContext *vk = vulkan_context; 2603 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2604 VulkanCommandPool *vcp = vk->command_pools[vcb->timeline]; 2605 VulkanQueue *vq = vk->queues[vcb->timeline]; 2606 VulkanSemaphore *vs = &vq->timeline_semaphore; 2607 2608 vkEndCommandBuffer(vcp->buffers[vcb->buffer_index]); 2609 2610 DeferLoop(take_lock(&vq->lock, -1), release_lock(&vq->lock)) { 2611 VkCommandBufferSubmitInfo command_buffer_submit_info = { 2612 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, 2613 .commandBuffer = vcp->buffers[vcb->buffer_index], 2614 }; 2615 2616 result = ++vs->value; 2617 2618 u32 signal_submit_info_count = 1; 2619 VkSemaphoreSubmitInfo signal_submit_infos[2] = {{ 2620 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, 2621 .semaphore = vs->semaphore, 2622 .value = result, 2623 .stageMask = vq->pipeline_stage_flags, 2624 }}; 2625 2626 if ValidVulkanHandle(finished_semaphore) { 2627 VulkanSemaphore *fs = vk_entity_data(finished_semaphore, VulkanEntityKind_Semaphore); 2628 signal_submit_infos[signal_submit_info_count++] = (VkSemaphoreSubmitInfo){ 2629 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, 2630 .semaphore = fs->semaphore, 2631 .stageMask = vq->pipeline_stage_flags, 2632 }; 2633 } 2634 2635 u32 wait_submit_info_count = 0; 2636 VkSemaphoreSubmitInfo wait_submit_infos[VulkanQueueKind_Count + 1]; 2637 for (u32 i = 0; i < vk->unique_queues; i++) { 2638 u32 queue_index = vk->queue_indices[i]; 2639 if (vcb->in_flight_wait_values[queue_index] > 0) { 2640 VulkanQueue *q = vk->queues[queue_index]; 2641 VkSemaphoreSubmitInfo wait_ssi = { 2642 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, 2643 .semaphore = q->timeline_semaphore.semaphore, 2644 .value = vcb->in_flight_wait_values[queue_index], 2645 .stageMask = q->pipeline_stage_flags, 2646 }; 2647 wait_submit_infos[wait_submit_info_count++] = wait_ssi; 2648 } 2649 } 2650 2651 if ValidVulkanHandle(wait_semaphore) { 2652 VulkanSemaphore *ws = vk_entity_data(wait_semaphore, VulkanEntityKind_Semaphore); 2653 wait_submit_infos[wait_submit_info_count++] = (VkSemaphoreSubmitInfo){ 2654 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, 2655 .semaphore = ws->semaphore, 2656 .stageMask = vq->pipeline_stage_flags, 2657 }; 2658 } 2659 2660 VkSubmitInfo2 submit_info = { 2661 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, 2662 .commandBufferInfoCount = 1, 2663 .pCommandBufferInfos = &command_buffer_submit_info, 2664 .waitSemaphoreInfoCount = wait_submit_info_count, 2665 .pWaitSemaphoreInfos = wait_submit_infos, 2666 .signalSemaphoreInfoCount = signal_submit_info_count, 2667 .pSignalSemaphoreInfos = signal_submit_infos, 2668 }; 2669 2670 vkQueueSubmit2(vq->queue, 1, &submit_info, 0); 2671 2672 vcp->bound_pipeline = 0; 2673 2674 atomic_store_u64(vcp->submission_values + vcb->buffer_index, result); 2675 } 2676 2677 release_lock(&vcp->lock); 2678 2679 vk_entity_release((VulkanEntity *)command.value[0]); 2680 } 2681 return result; 2682 } 2683 2684 DEBUG_IMPORT void 2685 vk_command_begin_rendering(VulkanHandle command, GPUImage *colour, GPUImage *depth, GPUImage *resolve) 2686 { 2687 if ValidVulkanHandle(command) { 2688 VkCommandBuffer cmd = vk_command_buffer(command); 2689 2690 assert((colour->width == depth->width) && (colour->height == depth->height)); 2691 2692 VulkanImage *ci = vk_entity_data(colour->image, VulkanEntityKind_Image); 2693 VulkanImage *di = vk_entity_data(depth->image, VulkanEntityKind_Image); 2694 VulkanImage *ri = 0; 2695 if (resolve) ri = vk_entity_data(resolve->image, VulkanEntityKind_Image); 2696 2697 // NOTE: Layout Transitions 2698 { 2699 u32 image_memory_barrier_count = 2; 2700 VkImageMemoryBarrier2 image_memory_barriers[3] = { 2701 { 2702 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, 2703 .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, 2704 .srcAccessMask = 0, 2705 .dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT, 2706 .dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, 2707 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, 2708 .newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 2709 .image = ci->image, 2710 .subresourceRange = { 2711 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 2712 .baseMipLevel = 0, 2713 .levelCount = 1, 2714 .baseArrayLayer = 0, 2715 .layerCount = 1, 2716 }, 2717 }, 2718 { 2719 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, 2720 .srcStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, 2721 .srcAccessMask = 0, 2722 .dstStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, 2723 .dstAccessMask = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, 2724 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, 2725 .newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 2726 .image = di->image, 2727 .subresourceRange = { 2728 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT, 2729 .baseMipLevel = 0, 2730 .levelCount = 1, 2731 .baseArrayLayer = 0, 2732 .layerCount = 1, 2733 }, 2734 }, 2735 }; 2736 2737 if (resolve) image_memory_barriers[image_memory_barrier_count++] = (VkImageMemoryBarrier2){ 2738 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, 2739 .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, 2740 .srcAccessMask = 0, 2741 .dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT|VK_PIPELINE_STAGE_2_RESOLVE_BIT, 2742 .dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, 2743 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, 2744 .newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 2745 .image = ri->image, 2746 .subresourceRange = { 2747 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 2748 .baseMipLevel = 0, 2749 .levelCount = 1, 2750 .baseArrayLayer = 0, 2751 .layerCount = 1, 2752 }, 2753 }; 2754 2755 VkDependencyInfo dependency_info = { 2756 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, 2757 .imageMemoryBarrierCount = image_memory_barrier_count, 2758 .pImageMemoryBarriers = image_memory_barriers, 2759 }; 2760 2761 vkCmdPipelineBarrier2(cmd, &dependency_info); 2762 } 2763 2764 VkRenderingAttachmentInfo colour_attachment = { 2765 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, 2766 .imageView = ci->view, 2767 .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 2768 .resolveMode = ri ? VK_RESOLVE_MODE_AVERAGE_BIT : 0, 2769 .resolveImageView = ri ? ri->view : 0, 2770 .resolveImageLayout = ri ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : 0, 2771 .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, 2772 .storeOp = VK_ATTACHMENT_STORE_OP_STORE, 2773 .clearValue = {.color = {{0.0f, 0.0f, 0.0f, 0.0f}}}, 2774 }; 2775 2776 VkRenderingAttachmentInfo depth_stencil_attachment = { 2777 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, 2778 .imageView = di->view, 2779 .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 2780 .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, 2781 .storeOp = VK_ATTACHMENT_STORE_OP_STORE, 2782 .clearValue = {.depthStencil = {1.0f, 0}}, 2783 }; 2784 2785 VkRenderingInfo rendering_info = { 2786 .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, 2787 .renderArea = {.offset = {0}, .extent = {colour->width, colour->height}}, 2788 .layerCount = 1, 2789 .colorAttachmentCount = 1, 2790 .pColorAttachments = &colour_attachment, 2791 .pDepthAttachment = &depth_stencil_attachment, 2792 .pStencilAttachment = &depth_stencil_attachment, 2793 }; 2794 2795 vkCmdBeginRendering(cmd, &rendering_info); 2796 } 2797 } 2798 2799 DEBUG_IMPORT void 2800 vk_command_draw(VulkanHandle command, GPUBuffer *model) 2801 { 2802 if (ValidVulkanHandle(command) && ValidVulkanHandle(model->handle)) { 2803 VkCommandBuffer cmd = vk_command_buffer(command); 2804 VulkanBuffer *vb = vk_entity_data(model->handle, VulkanEntityKind_RenderModel); 2805 vkCmdBindIndexBuffer2(cmd, vb->buffer, 0, vk_index_size(vb->index_type) * model->index_count, vb->index_type); 2806 vkCmdDrawIndexed(cmd, model->index_count, 1, 0, 0, 0); 2807 } 2808 } 2809 2810 DEBUG_IMPORT void 2811 vk_command_scissor(VulkanHandle command, u32 width, u32 height, u32 x_offset, u32 y_offset) 2812 { 2813 if ValidVulkanHandle(command) { 2814 VkCommandBuffer cmd = vk_command_buffer(command); 2815 VkRect2D scissor = {.offset = {x_offset, y_offset}, .extent = {width, height}}; 2816 vkCmdSetScissor(cmd, 0, 1, &scissor); 2817 } 2818 } 2819 2820 DEBUG_IMPORT void 2821 vk_command_viewport(VulkanHandle command, f32 width, f32 height, f32 x_offset, f32 y_offset, f32 min_depth, f32 max_depth) 2822 { 2823 if ValidVulkanHandle(command) { 2824 VkCommandBuffer cmd = vk_command_buffer(command); 2825 VkViewport viewport = {x_offset, y_offset, width, height, min_depth, max_depth}; 2826 vkCmdSetViewport(cmd, 0, 1, &viewport); 2827 } 2828 } 2829 2830 DEBUG_IMPORT void 2831 vk_command_end_rendering(VulkanHandle command) 2832 { 2833 if ValidVulkanHandle(command) vkCmdEndRendering(vk_command_buffer(command)); 2834 } 2835 2836 DEBUG_IMPORT void 2837 vk_command_copy_buffer(VulkanHandle command, GPUBuffer *restrict destination, 2838 GPUBuffer *restrict source, u64 source_offset, i64 size) 2839 { 2840 if (ValidVulkanHandle(command) && ValidVulkanHandle(destination->handle) && ValidVulkanHandle(source->handle)) { 2841 VkCommandBuffer cmd = vk_command_buffer(command); 2842 VulkanBuffer *db = vk_entity_data(destination->handle, VulkanEntityKind_Buffer); 2843 VulkanBuffer *sb = vk_entity_data(source->handle, VulkanEntityKind_Buffer); 2844 2845 VkBufferCopy2 buffer_copy = { 2846 .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2, 2847 .srcOffset = source_offset, 2848 .dstOffset = 0, 2849 .size = size, 2850 }; 2851 2852 VkCopyBufferInfo2 copy_buffer_info = { 2853 .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2, 2854 .srcBuffer = sb->buffer, 2855 .dstBuffer = db->buffer, 2856 .regionCount = 1, 2857 .pRegions = &buffer_copy, 2858 }; 2859 2860 vkCmdCopyBuffer2(cmd, ©_buffer_info); 2861 } 2862 } 2863 2864 DEBUG_IMPORT u64 * 2865 vk_command_read_timestamps(VulkanTimeline timeline, Arena *arena) 2866 { 2867 u64 *result = 0; 2868 if Between(timeline, 0, VulkanTimeline_Count - 1) { 2869 VulkanContext *vk = vulkan_context; 2870 VulkanCommandPool *vcp = vk->command_pools[timeline]; 2871 DeferLoop(take_lock(&vcp->lock, -1), release_lock(&vcp->lock)) { 2872 u32 index = (vcp->next_index - 1) % countof(vcp->buffers); 2873 u32 count = vcp->queries_occupied[index]; 2874 if (count > 0) { 2875 result = push_array(arena, u64, count + 1); 2876 result[0] = count; 2877 2878 vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL); 2879 2880 vkGetQueryPoolResults(vk->device, vcp->query_pool, index * MaxCommandBufferTimestamps, count, 2881 count * sizeof(u64), result + 1, 8, VK_QUERY_RESULT_WAIT_BIT); 2882 } 2883 } 2884 } else { 2885 result = push_array(arena, u64, 1); 2886 } 2887 return result; 2888 }