ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

vulkan.c (101975B)


      1 /* See LICENSE for license details. */
      2 // TODO(rnp)
      3 // [ ]: what is needed for HDR? I think it makes sense to just default to it nowadays
      4 // [ ]: once opengl is removed switch images to SRGB and/or 16 bit Float
      5 // [ ]: VK_KHR_robustness2 probably shouldn't be required but it also might not matter
      6 
      7 #include "beamformer_internal.h"
      8 #include "vulkan.h"
      9 #include "external/glslang/glslang/Include/glslang_c_interface.h"
     10 
     11 #define ForceSingleQueue (0)
     12 
     13 #define glslang_info(s) s8("[glslang] " s)
     14 #define vulkan_info(s)  s8("[vulkan]  " s)
     15 
     16 #define ValidVulkanHandle(h) ((h).value[0] != 0)
     17 
     18 #define MaxCommandBuffersInFlight  BeamformerMaxRawDataFramesInFlight
     19 #define MaxCommandBufferTimestamps (1024)
     20 
     21 typedef enum {
     22 	VulkanQueueKind_Graphics,
     23 	VulkanQueueKind_Compute,
     24 	VulkanQueueKind_Transfer,
     25 	VulkanQueueKind_Count,
     26 } VulkanQueueKind;
     27 
     28 typedef enum {
     29 	VulkanMemoryKind_Device,
     30 	VulkanMemoryKind_BAR,
     31 	VulkanMemoryKind_Host,
     32 	VulkanMemoryKind_Count,
     33 } VulkanMemoryKind;
     34 
     35 typedef struct {
     36 	VkDeviceMemory    memory;
     37 	VkBuffer          buffer;
     38 	u64               memory_size;
     39 
     40 	void *            host_pointer;
     41 
     42 	VulkanMemoryKind  memory_kind;
     43 
     44 	// NOTE: only used when the buffer is backing a VulkanRenderModel.
     45 	VkIndexType       index_type;
     46 } VulkanBuffer;
     47 
     48 typedef struct {
     49 	VkDeviceMemory    memory;
     50 	VkImage           image;
     51 	VkImageView       view;
     52 } VulkanImage;
     53 
     54 typedef struct {
     55 	VkPipeline         pipeline;
     56 	VkPipelineLayout   layout;
     57 	VkShaderStageFlags stage_flags;
     58 } VulkanPipeline;
     59 
     60 typedef struct {
     61 	VkSemaphore semaphore;
     62 	u64         value;
     63 } VulkanSemaphore;
     64 
     65 typedef struct {
     66 	VulkanTimeline timeline;
     67 	u32            buffer_index;
     68 
     69 	// NOTE(rnp): since there may not be QueueKind_Count queues, when putting values into this
     70 	// array you must be careful to map through the queue_indices array in the vulkan_context.
     71 	u64 in_flight_wait_values[VulkanQueueKind_Count];
     72 } VulkanCommandBuffer;
     73 
     74 typedef enum {
     75 	VulkanEntityKind_Buffer,
     76 	VulkanEntityKind_CommandBuffer,
     77 	VulkanEntityKind_Image,
     78 	VulkanEntityKind_Pipeline,
     79 	VulkanEntityKind_RenderModel,
     80 	VulkanEntityKind_Semaphore,
     81 } VulkanEntityKind;
     82 
     83 typedef struct VulkanEntity VulkanEntity;
     84 struct VulkanEntity {
     85 	VulkanEntity *   next;
     86 	VulkanEntityKind kind;
     87 	union {
     88 		VulkanBuffer        buffer;
     89 		VulkanCommandBuffer command_buffer;
     90 		VulkanImage         image;
     91 		VulkanPipeline      pipeline;
     92 		VulkanSemaphore     semaphore;
     93 	} as;
     94 };
     95 
     96 typedef alignas(64) struct {
     97 	i32 lock;
     98 
     99 	u16     queue_family;
    100 	u16     queue_index;
    101 	VkQueue queue;
    102 
    103 	VulkanSemaphore timeline_semaphore;
    104 
    105 	VkPipelineStageFlags2 pipeline_stage_flags;
    106 } VulkanQueue;
    107 static_assert(alignof(VulkanQueue) == 64, "VulkanQueue must be placed on its own cacheline");
    108 
    109 typedef alignas(64) struct {
    110 	i32             lock;
    111 	u32             next_index;
    112 
    113 	VulkanPipeline *bound_pipeline;
    114 
    115 	VkCommandPool   handle;
    116 	VkQueryPool     query_pool;
    117 	VkCommandBuffer buffers[MaxCommandBuffersInFlight];
    118 
    119 	u64             submission_values[MaxCommandBuffersInFlight];
    120 	u32             queries_occupied[MaxCommandBuffersInFlight];
    121 } VulkanCommandPool;
    122 
    123 typedef struct {
    124 	Arena             arena;
    125 	i32               arena_lock;
    126 
    127 	VkInstance        handle;
    128 	VkDevice          device;
    129 	VkPhysicalDevice  physical_device;
    130 
    131 	VkDescriptorPool       descriptor_pool;
    132 	VkDescriptorSetLayout  descriptor_set_layouts[BeamformerShaderResourceKind_Count];
    133 	VkDescriptorSet        descriptor_sets[BeamformerShaderResourceKind_Count];
    134 	// NOTE(rnp): must store these if we want to allow partial updates easily
    135 	VkDescriptorBufferInfo descriptor_buffer_infos[BeamformerShaderBufferSlot_Count];
    136 
    137 	// NOTE(rnp): fallback for when a shader fails to compile
    138 	VulkanPipeline    default_compute_pipeline;
    139 	VulkanPipeline    default_graphics_pipeline;
    140 
    141 	GPUInfo           gpu_info;
    142 
    143 	struct {
    144 		u64             max_allocation_size;
    145 		u64             non_coherent_atom_size;
    146 		u8              gpu_heap_index;
    147 		i8              memory_type_indices[VulkanMemoryKind_Count];
    148 		b8              memory_host_coherent[VulkanMemoryKind_Count];
    149 		static_assert(VK_MAX_MEMORY_HEAPS < I8_MAX, "");
    150 		static_assert(VK_MAX_MEMORY_TYPES < U8_MAX, "");
    151 	} memory_info;
    152 
    153 	VulkanCommandPool * command_pools[VulkanTimeline_Count];
    154 	VulkanQueue *       queues[VulkanQueueKind_Count];
    155 	// NOTE(rnp): there are a few places in the code where simply going through the queues map
    156 	// is not sufficient. those places need to know of the unique queues which unique queue
    157 	// is being referred to. that code uses this map instead.
    158 	u16               queue_indices[VulkanQueueKind_Count];
    159 	u16               unique_queues;
    160 
    161 	VkFormat          swap_chain_image_format;
    162 	VkFormat          depth_stencil_format;
    163 
    164 	VulkanEntity *    entity_freelist;
    165 	Arena             entity_arena;
    166 	i32               entity_lock;
    167 } VulkanContext;
    168 
    169 read_only global const char *vk_required_instance_extensions[] = {
    170 };
    171 
    172 #if OS_WINDOWS
    173 #define VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST \
    174 	X("VK_KHR_external_memory_win32") \
    175 	X("VK_KHR_external_semaphore_win32") \
    176 
    177 #else
    178 #define VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST \
    179 	X("VK_KHR_external_memory_fd") \
    180 	X("VK_KHR_external_semaphore_fd") \
    181 
    182 #endif
    183 
    184 #define VK_REQUIRED_DEVICE_EXTENSIONS_LIST \
    185 	X("VK_KHR_16bit_storage") \
    186 	X("VK_KHR_external_memory") \
    187 	X("VK_KHR_external_semaphore") \
    188 	X("VK_KHR_robustness2") \
    189 	X("VK_KHR_storage_buffer_storage_class") \
    190 	X("VK_KHR_timeline_semaphore") \
    191 	VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST
    192 
    193 #define X(str) s8_comp(str),
    194 read_only global s8 vk_required_device_extensions[] = {VK_REQUIRED_DEVICE_EXTENSIONS_LIST};
    195 #undef X
    196 
    197 #define VK_OPTIONAL_DEVICE_EXTENSIONS_LIST \
    198 	X(VK_KHR, cooperative_matrix) \
    199 
    200 #define X(p, s, ...) s8_comp(#p "_" #s),
    201 read_only global s8 vk_optional_device_extensions[] = {VK_OPTIONAL_DEVICE_EXTENSIONS_LIST};
    202 #undef X
    203 
    204 #define VK_REQUIRED_PHYSICAL_FEATURES \
    205 	X(shaderInt16) \
    206 	X(shaderInt64) \
    207 
    208 #define VK_REQUIRED_PHYSICAL_11_FEATURES \
    209 	X(storageBuffer16BitAccess) \
    210 
    211 #define VK_REQUIRED_PHYSICAL_12_FEATURES \
    212 	X(bufferDeviceAddress) \
    213 	X(shaderFloat16) \
    214 	X(timelineSemaphore) \
    215 	X(vulkanMemoryModel) \
    216 
    217 #define VK_REQUIRED_PHYSICAL_13_FEATURES \
    218 	X(dynamicRendering) \
    219 	X(synchronization2) \
    220 
    221 #define VK_DEBUG_EXTENSIONS \
    222 	X(VK_KHR, shader_non_semantic_info) \
    223 	X(VK_KHR, shader_relaxed_extended_instruction) \
    224 
    225 #define X(p, s, ...) s8_comp(#p "_" #s),
    226 read_only global s8 vk_debug_extensions[] = {VK_DEBUG_EXTENSIONS};
    227 #undef X
    228 
    229 #define VK_INSTANCE_DEBUG_EXTENSIONS_LIST \
    230 	X(VK_EXT, debug_utils) \
    231 
    232 #define X(p, s, ...) s8_comp(#p "_" #s),
    233 read_only global s8 vk_instance_debug_extensions[] = {VK_INSTANCE_DEBUG_EXTENSIONS_LIST};
    234 #undef X
    235 
    236 #if BEAMFORMER_DEBUG
    237 #define VK_VALIDATION_LAYERS_LIST \
    238 	X(KHRONOS, validation) \
    239 
    240 #else
    241 #define VK_VALIDATION_LAYERS_LIST
    242 #endif
    243 
    244 read_only global str8 vk_validation_layers[] = {
    245 	#define X(vendor, name, ...) str8_comp("VK_LAYER_" #vendor "_" #name),
    246 	VK_VALIDATION_LAYERS_LIST
    247 	#undef X
    248 };
    249 
    250 global struct {
    251 	u32 driver_api_version;
    252 	union {
    253 		struct {
    254 			#define X(_, name, ...) b8 name;
    255 			VK_OPTIONAL_DEVICE_EXTENSIONS_LIST
    256 			#undef X
    257 		};
    258 		b8 E[countof(vk_optional_device_extensions)];
    259 	} optional;
    260 
    261 	union {
    262 		struct {
    263 			#define X(_, name, ...) b8 name;
    264 			VK_DEBUG_EXTENSIONS
    265 			#undef X
    266 		};
    267 		b8 E[countof(vk_debug_extensions)];
    268 	} debug;
    269 
    270 	union {
    271 		struct {
    272 			#define X(_, name, ...) b8 name;
    273 			VK_INSTANCE_DEBUG_EXTENSIONS_LIST
    274 			#undef X
    275 		};
    276 		b8 E[countof(vk_instance_debug_extensions)];
    277 	} instance;
    278 
    279 	#if BEAMFORMER_DEBUG
    280 	struct {
    281 		union {
    282 			struct {
    283 				#define X(_, name, ...) b8 name;
    284 				VK_VALIDATION_LAYERS_LIST
    285 				#undef X
    286 			};
    287 			b8 E[countof(vk_validation_layers)];
    288 		} enabled;
    289 
    290 		union {
    291 			struct {
    292 				#define X(_, name, ...) u32 name;
    293 				VK_VALIDATION_LAYERS_LIST
    294 				#undef X
    295 			};
    296 			u32 E[countof(vk_validation_layers)];
    297 		} version;
    298 	} layers;
    299 	#endif
    300 } vulkan_config;
    301 
    302 #define MAX_ENABLED_EXTENSIONS (  countof(vk_required_device_extensions) \
    303                                 + countof(vk_optional_device_extensions) \
    304                                 + countof(vk_debug_extensions) \
    305                                )
    306 
    307 global VulkanContext vulkan_context[1];
    308 
    309 /* NOTE(rnp): the idea here is to set reasonable development constraints.
    310  * They should probably not match one to one with the maximums of the dev
    311  * machine's hardware. Instead these are here to cause compile time failure
    312  * for features which are not expected to work everywhere. */
    313 global glslang_resource_t glslc_resource_constraints[1] = {{
    314 	.max_compute_work_group_count_x = 65535,
    315 	.max_compute_work_group_count_y = 65535,
    316 	.max_compute_work_group_count_z = 65535,
    317 	.max_compute_work_group_size_x  = 1024,
    318 	.max_compute_work_group_size_y  = 1024,
    319 	.max_compute_work_group_size_z  = 1024,
    320 
    321 	// NOTE: taken from glslang defaults
    322 	.max_lights = 32,
    323 	.max_clip_planes = 6,
    324 	.max_texture_units = 32,
    325 	.max_texture_coords = 32,
    326 	.max_vertex_attribs = 64,
    327 	.max_vertex_uniform_components = 4096,
    328 	.max_varying_floats = 64,
    329 	.max_vertex_texture_image_units = 32,
    330 	.max_combined_texture_image_units = 80,
    331 	.max_texture_image_units = 32,
    332 	.max_fragment_uniform_components = 4096,
    333 	.max_draw_buffers = 32,
    334 	.max_vertex_uniform_vectors = 128,
    335 	.max_varying_vectors = 8,
    336 	.max_fragment_uniform_vectors = 16,
    337 	.max_vertex_output_vectors = 16,
    338 	.max_fragment_input_vectors = 15,
    339 	.min_program_texel_offset = -8,
    340 	.max_program_texel_offset = 7,
    341 	.max_clip_distances = 8,
    342 	.max_compute_uniform_components = 1024,
    343 	.max_compute_texture_image_units = 16,
    344 	.max_compute_image_uniforms = 8,
    345 	.max_compute_atomic_counters = 8,
    346 	.max_compute_atomic_counter_buffers = 1,
    347 	.max_varying_components = 60,
    348 	.max_vertex_output_components = 64,
    349 	.max_fragment_input_components = 128,
    350 	.max_image_units = 8,
    351 	.max_combined_image_units_and_fragment_outputs = 8,
    352 	.max_combined_shader_output_resources = 8,
    353 	.max_image_samples = 0,
    354 	.max_vertex_image_uniforms = 0,
    355 	.max_fragment_image_uniforms = 8,
    356 	.max_combined_image_uniforms = 8,
    357 	.max_viewports = 16,
    358 	.max_vertex_atomic_counters = 0,
    359 	.max_fragment_atomic_counters = 8,
    360 	.max_combined_atomic_counters = 8,
    361 	.max_atomic_counter_bindings = 1,
    362 	.max_vertex_atomic_counter_buffers = 0,
    363 	.max_fragment_atomic_counter_buffers = 1,
    364 	.max_combined_atomic_counter_buffers = 1,
    365 	.max_atomic_counter_buffer_size = 16384,
    366 	.max_transform_feedback_buffers = 4,
    367 	.max_transform_feedback_interleaved_components = 64,
    368 	.max_cull_distances = 8,
    369 	.max_combined_clip_and_cull_distances = 8,
    370 	.max_samples = 4,
    371 	.max_mesh_output_vertices_ext = 256,
    372 	.max_mesh_output_primitives_ext = 256,
    373 	.max_mesh_work_group_size_x_ext = 128,
    374 	.max_mesh_work_group_size_y_ext = 128,
    375 	.max_mesh_work_group_size_z_ext = 128,
    376 	.max_task_work_group_size_x_ext = 128,
    377 	.max_task_work_group_size_y_ext = 128,
    378 	.max_task_work_group_size_z_ext = 128,
    379 	.max_mesh_view_count_ext = 4,
    380 	.max_dual_source_draw_buffers_ext = 1,
    381 
    382 	.limits = {
    383 		.non_inductive_for_loops                  = 1,
    384 		.while_loops                              = 1,
    385 		.do_while_loops                           = 1,
    386 		.general_uniform_indexing                 = 1,
    387 		.general_attribute_matrix_vector_indexing = 1,
    388 		.general_varying_indexing                 = 1,
    389 		.general_sampler_indexing                 = 1,
    390 		.general_variable_indexing                = 1,
    391 		.general_constant_matrix_vector_indexing  = 1,
    392 	},
    393 }};
    394 
    395 #if BEAMFORMER_RENDERDOC_HOOKS
    396 DEBUG_IMPORT void *
    397 vk_renderdoc_instance_handle(void)
    398 {
    399 	return *((void **)vulkan_context->handle);
    400 }
    401 #endif
    402 
    403 #if BEAMFORMER_DEBUG
    404 #define vk_label_object(k, h, label, extra) vk_label_object_(VK_OBJECT_TYPE_##k, (u64)h, label, extra)
    405 function void
    406 vk_label_object_(VkObjectType kind, u64 handle, s8 label, s8 extra)
    407 {
    408 	local_persist u8 buffer[1024];
    409 	Stream sb = arena_stream(arena_from_memory(buffer, sizeof(buffer)));
    410 	if (vulkan_config.instance.debug_utils && label.len > 0) {
    411 		stream_append_s8s(&sb, label, s8(" ("), extra, s8(")"));
    412 		stream_append_byte(&sb, 0);
    413 		if (!sb.errors) {
    414 			VkDebugUtilsObjectNameInfoEXT object_name_info = {
    415 				.sType        = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
    416 				.objectType   = kind,
    417 				.objectHandle = handle,
    418 				.pObjectName  = (char *)sb.data,
    419 			};
    420 			vkSetDebugUtilsObjectNameEXT(vulkan_context->device, &object_name_info);
    421 		}
    422 	}
    423 }
    424 #else
    425 #define vk_label_object(...)
    426 #define vk_label_object_(...)
    427 #endif
    428 
    429 function VulkanEntity *
    430 vk_entity_allocate(VulkanEntityKind kind)
    431 {
    432 	VulkanEntity *result = 0;
    433 	DeferLoop(take_lock(&vulkan_context->entity_lock, -1), release_lock(&vulkan_context->entity_lock))
    434 	{
    435 		result = SLLPopFreelist(vulkan_context->entity_freelist);
    436 		if (!result) result = push_array_no_zero(&vulkan_context->entity_arena, VulkanEntity, 1);
    437 	}
    438 
    439 	zero_struct(result);
    440 	result->kind = kind;
    441 	return result;
    442 }
    443 
    444 function void
    445 vk_entity_release(VulkanEntity *entity)
    446 {
    447 	DeferLoop(take_lock(&vulkan_context->entity_lock, -1), release_lock(&vulkan_context->entity_lock))
    448 	{
    449 		SLLStackPush(vulkan_context->entity_freelist, entity, next);
    450 	}
    451 }
    452 
    453 function void *
    454 vk_entity_data(VulkanHandle h, VulkanEntityKind kind)
    455 {
    456 	VulkanEntity *e = (VulkanEntity *)h.value[0];
    457 	assert(ValidVulkanHandle(h) && e->kind == kind);
    458 	return &e->as;
    459 }
    460 
    461 function VkCommandBuffer
    462 vk_command_buffer(VulkanHandle h)
    463 {
    464 	VulkanCommandBuffer *vcb = vk_entity_data(h, VulkanEntityKind_CommandBuffer);
    465 	VulkanCommandPool   *vcp = vulkan_context->command_pools[vcb->timeline];
    466 	VkCommandBuffer result = vcp->buffers[vcb->buffer_index];
    467 	return result;
    468 }
    469 
    470 #define glslang_log(a, ...) glslang_log_(a, arg_list(s8, __VA_ARGS__))
    471 function void
    472 glslang_log_(Arena arena, s8 *items, uz count)
    473 {
    474 	Stream sb = arena_stream(arena);
    475 	stream_append_s8(&sb, glslang_info(""));
    476 	stream_append_s8s_(&sb, items, count);
    477 	if (sb.data[sb.widx - 1] != '\n') stream_append_byte(&sb, '\n');
    478 	os_console_log(sb.data, sb.widx);
    479 }
    480 
    481 function s8
    482 glsl_to_spirv(Arena *arena, u32 kind, s8 shader_text, s8 name)
    483 {
    484 	/* NOTE(rnp): glslang's garbage c interface doesn't expose internal usage of strings with length */
    485 	assert(shader_text.data[shader_text.len] == 0);
    486 
    487 	glslang_input_t input = {
    488 		.language                          = GLSLANG_SOURCE_GLSL,
    489 		.stage                             = kind,
    490 		.client                            = GLSLANG_CLIENT_VULKAN,
    491 		.client_version                    = GLSLANG_TARGET_VULKAN_1_4,
    492 		.target_language                   = GLSLANG_TARGET_SPV,
    493 		.target_language_version           = GLSLANG_TARGET_SPV_1_6,
    494 		.code                              = (c8 *)shader_text.data,
    495 		.default_version                   = 460,
    496 		.default_profile                   = GLSLANG_NO_PROFILE,
    497 		.force_default_version_and_profile = 0,
    498 		.forward_compatible                = 0,
    499 		.messages                          = GLSLANG_MSG_DEFAULT_BIT,
    500 		.resource                          = glslc_resource_constraints,
    501 	};
    502 	glslang_shader_t *shader = glslang_shader_create(&input);
    503 
    504 	s8 error = {0};
    505 	if (glslang_shader_preprocess(shader, &input)) {
    506 		if (!glslang_shader_parse(shader, &input))
    507 			error = s8("parsing failed");
    508 	} else {
    509 		error = s8("preprocessing failed");
    510 	}
    511 
    512 	if (error.len) {
    513 		glslang_log(*arena, name, s8(": "), error, s8("\n"),
    514 		            c_str_to_s8((c8 *)glslang_shader_get_info_log(shader)),
    515 		            c_str_to_s8((c8 *)glslang_shader_get_info_debug_log(shader)));
    516 		glslang_shader_delete(shader);
    517 		shader = 0;
    518 	}
    519 
    520 	s8 result = {0};
    521 	if (shader) {
    522 		glslang_program_t *program = glslang_program_create();
    523 		glslang_program_add_shader(program, shader);
    524 		i32 messages = GLSLANG_MSG_DEBUG_INFO_BIT|GLSLANG_MSG_SPV_RULES_BIT|GLSLANG_MSG_VULKAN_RULES_BIT;
    525 		if (glslang_program_link(program, messages)) {
    526 			glslang_spv_options_t options = {.validate = 1,};
    527 
    528 			if (vulkan_config.debug.shader_non_semantic_info &&
    529 			    vulkan_config.debug.shader_relaxed_extended_instruction)
    530 			{
    531 				options.generate_debug_info                  = 1;
    532 				options.emit_nonsemantic_shader_debug_info   = 1;
    533 				options.emit_nonsemantic_shader_debug_source = 1;
    534 			}
    535 
    536 			glslang_program_add_source_text(program, kind, (c8 *)shader_text.data, shader_text.len);
    537 			glslang_program_SPIRV_generate_with_options(program, kind, &options);
    538 
    539 			u32 words   = glslang_program_SPIRV_get_size(program);
    540 			result.data = (u8 *)push_array(arena, u32, words);
    541 			result.len  = words * sizeof(u32);
    542 			glslang_program_SPIRV_get(program, (u32 *)result.data);
    543 
    544 			s8 spirv_msg = c_str_to_s8((c8 *)glslang_program_SPIRV_get_messages(program));
    545 			if (spirv_msg.len) glslang_log(*arena, name, s8(": spirv info: "), spirv_msg);
    546 		} else {
    547 			glslang_log(*arena, name, s8(": shader linking failed\n"),
    548 			            c_str_to_s8((c8 *)glslang_program_get_info_log(program)),
    549 			            c_str_to_s8((c8 *)glslang_program_get_info_debug_log(program)));
    550 		}
    551 		glslang_shader_delete(shader);
    552 		glslang_program_delete(program);
    553 	}
    554 
    555 	return result;
    556 }
    557 
    558 function u32
    559 vk_shader_kind_to_glslang_shader_kind(u32 kind)
    560 {
    561 	u32 result = ctz_u64(kind);
    562 	return result;
    563 }
    564 
    565 function VkShaderModule
    566 vk_compile_shader_module(Arena arena, u32 kind, s8 text, s8 name)
    567 {
    568 	VkShaderModule result = {0};
    569 	s8 spirv = glsl_to_spirv(&arena, vk_shader_kind_to_glslang_shader_kind(kind), text, name);
    570 	VkShaderModuleCreateInfo create_info = {
    571 		.sType    = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
    572 		.codeSize = (uz)spirv.len,
    573 		.pCode    = (u32 *)spirv.data,
    574 	};
    575 	if (spirv.len > 0) vkCreateShaderModule(vulkan_context->device, &create_info, 0, &result);
    576 
    577 	return result;
    578 }
    579 
    580 function VkShaderStageFlags
    581 vk_stage_flags_from_shader_kind(VulkanShaderKind kind)
    582 {
    583 	read_only local_persist VkShaderStageFlags map[VulkanShaderKind_Count + 1] = {
    584 		[VulkanShaderKind_Vertex]   = VK_SHADER_STAGE_VERTEX_BIT,
    585 		[VulkanShaderKind_Mesh]     = VK_SHADER_STAGE_MESH_BIT_EXT,
    586 		[VulkanShaderKind_Fragment] = VK_SHADER_STAGE_FRAGMENT_BIT,
    587 		[VulkanShaderKind_Compute]  = VK_SHADER_STAGE_COMPUTE_BIT,
    588 		[VulkanShaderKind_Count]    = 0,
    589 	};
    590 	VkShaderStageFlags result = map[Clamp((u32)kind, 0, VulkanShaderKind_Count)];
    591 	return result;
    592 }
    593 
    594 function VulkanPipeline
    595 vk_compute_pipeline_from_shader_text(Arena arena, s8 text, s8 name, u32 push_constants_size)
    596 {
    597 	VulkanPipeline result = {.stage_flags = VK_SHADER_STAGE_COMPUTE_BIT};
    598 	VkShaderModule module = vk_compile_shader_module(arena, VK_SHADER_STAGE_COMPUTE_BIT, text, name);
    599 	if (module) {
    600 		VkPushConstantRange push_constant_range = {
    601 			.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
    602 			.offset     = 0,
    603 			.size       = push_constants_size,
    604 		};
    605 
    606 		VkPipelineLayoutCreateInfo pipeline_layout_create_info = {
    607 			.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
    608 			.setLayoutCount         = countof(vulkan_context->descriptor_set_layouts),
    609 			.pSetLayouts            = vulkan_context->descriptor_set_layouts,
    610 			.pushConstantRangeCount = push_constants_size ? 1 : 0,
    611 			.pPushConstantRanges    = push_constants_size ? &push_constant_range : 0,
    612 		};
    613 
    614 		vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_create_info, 0, &result.layout);
    615 
    616 		VkComputePipelineCreateInfo pipeline_create_info = {
    617 			.sType  = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
    618 			.layout = result.layout,
    619 			.stage  = {
    620 				.sType  = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
    621 				.stage  = VK_SHADER_STAGE_COMPUTE_BIT,
    622 				.module = module,
    623 				.pName  = "main",
    624 			},
    625 		};
    626 
    627 		vkCreateComputePipelines(vulkan_context->device, 0, 1, &pipeline_create_info, 0, &result.pipeline);
    628 
    629 		vk_label_object(PIPELINE,        result.pipeline, name, s8("Pipeline"));
    630 		vk_label_object(PIPELINE_LAYOUT, result.layout,   name, s8("Pipeline Layout"));
    631 		vk_label_object(SHADER_MODULE,   module,          name, s8("Module"));
    632 
    633 		vkDestroyShaderModule(vulkan_context->device, module, 0);
    634 	}
    635 	if (result.pipeline == 0) result = vulkan_context->default_compute_pipeline;
    636 
    637 	return result;
    638 }
    639 
    640 function VulkanPipeline
    641 vk_graphics_pipeline_from_infos(Arena arena, VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size)
    642 {
    643 	assume(count == 2);
    644 
    645 	VulkanPipeline result = {0};
    646 	VkShaderModule modules[2];
    647 
    648 	modules[0] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[0].kind),
    649 	                                      infos[0].text, infos[0].name);
    650 	modules[1] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[1].kind),
    651 	                                      infos[1].text, infos[1].name);
    652 	if (modules[0] && modules[1]) {
    653 		result.stage_flags = vk_stage_flags_from_shader_kind(infos[0].kind)
    654 		                     | vk_stage_flags_from_shader_kind(infos[1].kind);
    655 
    656 		VkPushConstantRange pcr = {
    657 			.stageFlags = result.stage_flags,
    658 			.offset     = 0,
    659 			.size       = push_constants_size,
    660 		};
    661 
    662 		VkPipelineLayoutCreateInfo pipeline_layout_info = {
    663 			.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
    664 			.setLayoutCount         = countof(vulkan_context->descriptor_set_layouts),
    665 			.pSetLayouts            = vulkan_context->descriptor_set_layouts,
    666 			.pushConstantRangeCount = push_constants_size ? 1    : 0,
    667 			.pPushConstantRanges    = push_constants_size ? &pcr : 0,
    668 		};
    669 
    670 		vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_info, 0, &result.layout);
    671 
    672 		VkPipelineShaderStageCreateInfo shader_stage_create_infos[2] = {
    673 			{
    674 				.sType  = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
    675 				.stage  = vk_stage_flags_from_shader_kind(infos[0].kind),
    676 				.module = modules[0],
    677 				.pName  = "main",
    678 			},
    679 			{
    680 				.sType  = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
    681 				.stage  = vk_stage_flags_from_shader_kind(infos[1].kind),
    682 				.module = modules[1],
    683 				.pName  = "main",
    684 			},
    685 		};
    686 
    687 		VkPipelineVertexInputStateCreateInfo vertex_input_info = {
    688 			.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
    689 		};
    690 
    691 		VkPipelineInputAssemblyStateCreateInfo input_assembly_info = {
    692 			.sType    = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
    693 			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
    694 		};
    695 
    696 		VkPipelineViewportStateCreateInfo viewport_info = {
    697 			.sType         = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
    698 			.viewportCount = 1,
    699 			.scissorCount  = 1,
    700 		};
    701 
    702 		VkPipelineRasterizationStateCreateInfo rasterization_info = {
    703 			.sType       = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
    704 			.polygonMode = VK_POLYGON_MODE_FILL,
    705 			.lineWidth   = 1.0f,
    706 			.cullMode    = VK_CULL_MODE_BACK_BIT,
    707 			.frontFace   = VK_FRONT_FACE_CLOCKWISE,
    708 		};
    709 
    710 		VkPipelineMultisampleStateCreateInfo multisampling_info = {
    711 			.sType                = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
    712 			.rasterizationSamples = vulkan_context->gpu_info.max_msaa_samples,
    713 		};
    714 
    715 		VkPipelineDepthStencilStateCreateInfo depth_test_create_info = {
    716 			.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
    717 			.depthTestEnable       = 1,
    718 			.depthWriteEnable      = 1,
    719 			.depthCompareOp        = VK_COMPARE_OP_LESS,
    720 			.depthBoundsTestEnable = 1,
    721 			.stencilTestEnable     = 0,
    722 			.front                 = {0},
    723 			.back                  = {0},
    724 			.minDepthBounds        = 0.0f,
    725 			.maxDepthBounds        = 1.0f,
    726 		};
    727 
    728 		u32 colour_mask = VK_COLOR_COMPONENT_R_BIT|VK_COLOR_COMPONENT_G_BIT|VK_COLOR_COMPONENT_B_BIT|VK_COLOR_COMPONENT_A_BIT;
    729 		VkPipelineColorBlendAttachmentState blend_state = {
    730 			.colorWriteMask      = colour_mask,
    731 			.blendEnable         = 1,
    732 			.srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA,
    733 			.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
    734 			.colorBlendOp        = VK_BLEND_OP_ADD,
    735 			.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
    736 			.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
    737 			.alphaBlendOp        = VK_BLEND_OP_ADD,
    738 		};
    739 
    740 		VkPipelineColorBlendStateCreateInfo colour_blend_state_create = {
    741 			.sType           = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
    742 			.logicOpEnable   = 0,
    743 			.logicOp         = VK_LOGIC_OP_COPY,
    744 			.attachmentCount = 1,
    745 			.pAttachments    = &blend_state,
    746 		};
    747 
    748 		VkDynamicState dynamic_states[] = {
    749 			VK_DYNAMIC_STATE_VIEWPORT,
    750 			VK_DYNAMIC_STATE_SCISSOR,
    751 		};
    752 
    753 		VkPipelineDynamicStateCreateInfo dynamic_state_info = {
    754 			.sType             = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
    755 			.dynamicStateCount = countof(dynamic_states),
    756 			.pDynamicStates    = dynamic_states,
    757 		};
    758 
    759 		//VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_SRGB;
    760 		VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_UNORM;
    761 		VkPipelineRenderingCreateInfo rendering_create_info = {
    762 			.sType                   = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
    763 			.colorAttachmentCount    = 1,
    764 			.pColorAttachmentFormats = &colour_attachment_format,
    765 			.depthAttachmentFormat   = vulkan_context->depth_stencil_format,
    766 			.stencilAttachmentFormat = vulkan_context->depth_stencil_format,
    767 		};
    768 
    769 		VkGraphicsPipelineCreateInfo pci = {
    770 			.sType               = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
    771 			.pNext               = &rendering_create_info,
    772 			.stageCount          = countof(shader_stage_create_infos),
    773 			.pStages             = shader_stage_create_infos,
    774 			.pVertexInputState   = &vertex_input_info,
    775 			.pInputAssemblyState = &input_assembly_info,
    776 			.pViewportState      = &viewport_info,
    777 			.pRasterizationState = &rasterization_info,
    778 			.pMultisampleState   = &multisampling_info,
    779 			.pDepthStencilState  = &depth_test_create_info,
    780 			.pColorBlendState    = &colour_blend_state_create,
    781 			.pDynamicState       = &dynamic_state_info,
    782 			.layout              = result.layout,
    783 		};
    784 
    785 		vkCreateGraphicsPipelines(vulkan_context->device, 0, 1, &pci,0, &result.pipeline);
    786 
    787 		s8 extras[] = {
    788 			[VulkanShaderKind_Vertex]   = s8_comp("Vertex Module"),
    789 			[VulkanShaderKind_Mesh]     = s8_comp("Mesh Module"),
    790 			[VulkanShaderKind_Fragment] = s8_comp("Fragment Module"),
    791 		};
    792 		assert(infos[0].kind < countof(extras));
    793 		assert(infos[1].kind < countof(extras));
    794 
    795 		vk_label_object(PIPELINE,        result.pipeline, infos[0].name, s8("Pipeline"));
    796 		vk_label_object(PIPELINE_LAYOUT, result.layout,   infos[0].name, s8("Pipeline Layout"));
    797 		//vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[0], infos[0].name, extras[infos[0].kind]);
    798 		//vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[1], infos[1].name, extras[infos[1].kind]);
    799 	}
    800 
    801 	if (modules[0]) vkDestroyShaderModule(vulkan_context->device, modules[0], 0);
    802 	if (modules[1]) vkDestroyShaderModule(vulkan_context->device, modules[1], 0);
    803 
    804 	if (result.pipeline == 0) result = vulkan_context->default_graphics_pipeline;
    805 
    806 	return result;
    807 }
    808 
    809 function VulkanSemaphore
    810 vk_make_semaphore(OSHandle *export)
    811 {
    812 	VulkanContext *vk = vulkan_context;
    813 
    814 	VkSemaphoreCreateInfo       sci  = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
    815 	VkExportSemaphoreCreateInfo esci = {
    816 		.sType       = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
    817 		.handleTypes = OS_WINDOWS ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
    818 		                          : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
    819 	};
    820 	VkSemaphoreTypeCreateInfo stc = {
    821 		.sType         = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
    822 		.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
    823 	};
    824 
    825 	if (export) sci.pNext = &esci;
    826 	else        sci.pNext = &stc;
    827 
    828 	VulkanSemaphore result = {0};
    829 
    830 	vkCreateSemaphore(vk->device, &sci, 0, &result.semaphore);
    831 
    832 	if (export) {
    833 		if (OS_WINDOWS) {
    834 			VkSemaphoreGetWin32HandleInfoKHR ghi = {
    835 				.sType      = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
    836 				.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT,
    837 				.semaphore  = result.semaphore,
    838 			};
    839 			void *handle;
    840 			vkGetSemaphoreWin32HandleKHR(vk->device, &ghi, &handle);
    841 			export->value[0] = (u64)handle;
    842 		} else {
    843 			VkSemaphoreGetFdInfoKHR ghi = {
    844 				.sType      = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
    845 				.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
    846 				.semaphore  = result.semaphore,
    847 			};
    848 			i32 handle;
    849 			vkGetSemaphoreFdKHR(vk->device, &ghi, &handle);
    850 			export->value[0] = (u64)handle;
    851 		}
    852 	}
    853 
    854 	return result;
    855 }
    856 
    857 function void
    858 vk_release_memory(VkDeviceMemory memory, u64 size)
    859 {
    860 	VulkanContext *vk = vulkan_context;
    861 	vkFreeMemory(vk->device, memory, 0);
    862 	atomic_add_u64(&vk->gpu_info.gpu_heap_used, -size);
    863 }
    864 
    865 function b32
    866 vk_allocate_memory(VkDeviceMemory *memory, u64 size, VulkanMemoryKind kind, VkMemoryAllocateFlags flags,
    867                    VkMemoryDedicatedAllocateInfo *dedicated_allocate_info, OSHandle *export)
    868 {
    869 	VulkanContext *vk = vulkan_context;
    870 
    871 	VkExportMemoryAllocateInfo export_info = {
    872 		.sType       = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
    873 		.handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
    874 		                          : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
    875 	};
    876 
    877 	VkMemoryAllocateFlagsInfo memory_allocate_flags_info = {
    878 		.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
    879 		.flags = flags,
    880 		.pNext = dedicated_allocate_info,
    881 	};
    882 
    883 	if (export) {
    884 		export_info.pNext = dedicated_allocate_info;
    885 		memory_allocate_flags_info.pNext = &export_info;
    886 	}
    887 
    888 	VkMemoryAllocateInfo memory_allocate_info = {
    889 		.sType           = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
    890 		.allocationSize  = size,
    891 		.memoryTypeIndex = vk->memory_info.memory_type_indices[kind],
    892 		.pNext           = &memory_allocate_flags_info,
    893 	};
    894 
    895 	b32 result = vkAllocateMemory(vk->device, &memory_allocate_info, 0, memory) == VK_SUCCESS;
    896 	if (result) {
    897 		atomic_add_u64(&vk->gpu_info.gpu_heap_used, memory_allocate_info.allocationSize);
    898 
    899 		if (export) {
    900 			if (OS_WINDOWS) {
    901 				VkMemoryGetWin32HandleInfoKHR handle_info = {
    902 					.sType      = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
    903 					.memory     = *memory,
    904 					.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
    905 				};
    906 				void *handle;
    907 				vkGetMemoryWin32HandleKHR(vk->device, &handle_info, &handle);
    908 				export->value[0] = (u64)handle;
    909 			} else {
    910 				VkMemoryGetFdInfoKHR fd_info = {
    911 					.sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
    912 					.memory     = *memory,
    913 					.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
    914 				};
    915 				i32 fd;
    916 				vkGetMemoryFdKHR(vk->device, &fd_info, &fd);
    917 				export->value[0] = (u64)fd;
    918 			}
    919 		}
    920 	}
    921 	return result;
    922 }
    923 
    924 function u32
    925 vk_index_size(VkIndexType type)
    926 {
    927 	u32 result = 0;
    928 	switch (type) {
    929 	case VK_INDEX_TYPE_UINT16:{ result = 2; }break;
    930 	case VK_INDEX_TYPE_UINT32:{ result = 4; }break;
    931 	InvalidDefaultCase;
    932 	}
    933 	return result;
    934 }
    935 
    936 typedef struct {
    937 	GPUBuffer        *gpu_buffer;
    938 	u64               size;
    939 	VulkanUsageFlags  flags;
    940 	u32               queue_family_count;
    941 	u32               queue_family_indices[VulkanTimeline_Count];
    942 	VkIndexType       index_type;
    943 	s8                label;
    944 } VulkanBufferAllocateInfo;
    945 
    946 function b32
    947 vk_buffer_allocate_common(VulkanBuffer *vb, VulkanBufferAllocateInfo *ai)
    948 {
    949 	VulkanContext *vk = vulkan_context;
    950 
    951 	// TODO(rnp): this probably should be handled, its usually 4GB. likely
    952 	// need to chain multiple allocations and handle it in shader code
    953 	u64 clamp_size = vk->memory_info.max_allocation_size & ~(vk->memory_info.non_coherent_atom_size - 1);
    954 
    955 	// NOTE(rnp): renderdoc can't handle buffers that are too close to the allocation size limit
    956 	if (renderdoc_attached())
    957 		clamp_size -= MB(8);
    958 
    959 	u64 size = Min(ai->size, clamp_size);
    960 
    961 	VkBufferCreateInfo buffer_create_info = {
    962 		.sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
    963 		.usage       = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT|VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
    964 		.size        = size,
    965 		.sharingMode = ai->queue_family_count > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE,
    966 		.queueFamilyIndexCount = ai->queue_family_count,
    967 		.pQueueFamilyIndices   = ai->queue_family_indices,
    968 	};
    969 
    970 	if (ai->flags & VulkanUsageFlag_TransferSource)
    971 		buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
    972 
    973 	if (ai->flags & VulkanUsageFlag_TransferDestination)
    974 		buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
    975 
    976 	if (ai->index_type != VK_INDEX_TYPE_NONE_KHR)
    977 		buffer_create_info.usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
    978 
    979 	vkCreateBuffer(vk->device, &buffer_create_info, 0, &vb->buffer);
    980 	vk_label_object(BUFFER, vb->buffer, ai->label, s8("Buffer"));
    981 
    982 	VkMemoryRequirements memory_requirements;
    983 	vkGetBufferMemoryRequirements(vk->device, vb->buffer, &memory_requirements);
    984 
    985 	assert((u64)size <= memory_requirements.size);
    986 	size = memory_requirements.size;
    987 
    988 	VkMemoryDedicatedAllocateInfo dedicated_allocate_info = {
    989 		.sType  = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
    990 		.buffer = vb->buffer,
    991 	};
    992 
    993 	/* NOTE(rnp): to create a CPU writable buffer:
    994 	 * 1. try to allocate and map the entire buffer
    995 	 *    - this may fail if the buffer is bigger than the BAR size
    996 	 *      (unknowable from vulkan), or the memory space has become
    997 	 *      too fragmented (unlikely)
    998 	 * 2. if allocation or mapping fails we must chain a host buffer
    999 	 *    for staging. If this happens in practice we should add
   1000 	 *    the ability to import an existing external allocation
   1001 	 */
   1002 	b32 host_read_write = (ai->flags & VulkanUsageFlag_HostReadWrite) != 0;
   1003 	vb->memory_kind = host_read_write ? VulkanMemoryKind_BAR : VulkanMemoryKind_Device;
   1004 
   1005 	b32 result = 0;
   1006 	// TODO(rnp): this may fail if the allocation is too big for the BAR size
   1007 	// it needs to handled properly
   1008 	if (vk_allocate_memory(&vb->memory, size, vb->memory_kind, VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, &dedicated_allocate_info, 0)) {
   1009 		result  = 1;
   1010 		ai->gpu_buffer->size = size;
   1011 		vb->memory_size = size;
   1012 
   1013 		vb->index_type = ai->index_type;
   1014 
   1015 		vk_label_object(DEVICE_MEMORY, vb->memory, ai->label, s8("Memory"));
   1016 
   1017 		if (host_read_write)
   1018 			vkMapMemory(vk->device, vb->memory, 0, size, 0, &vb->host_pointer);
   1019 
   1020 		vkBindBufferMemory(vk->device, vb->buffer, vb->memory, 0);
   1021 		VkBufferDeviceAddressInfo buffer_device_address_info = {
   1022 			.sType  = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
   1023 			.buffer = vb->buffer,
   1024 		};
   1025 		ai->gpu_buffer->gpu_pointer = vkGetBufferDeviceAddress(vk->device, &buffer_device_address_info);
   1026 	}
   1027 	return result;
   1028 }
   1029 
   1030 function void
   1031 vk_load_instance(Arena arena, Stream *err)
   1032 {
   1033 	#define X(name, ...) name = (name##_fn *)vkGetInstanceProcAddr(0, #name);
   1034 	VkBaseProcedureList
   1035 	#undef X
   1036 
   1037 	u32 enabled_validation_layers_count = 0;
   1038 	const char *enabled_validation_layers[countof(vk_validation_layers)];
   1039 
   1040 	u32 enabled_instance_extensions_count = 0;
   1041 	const char *enabled_instance_extensions[countof(vk_required_instance_extensions) + countof(vk_instance_debug_extensions)];
   1042 
   1043 	static_assert(countof(vk_required_instance_extensions) == 0, "");
   1044 	//for EachElement(vk_required_instance_extensions, it)
   1045 	//	enabled_instance_extensions[enabled_instance_extensions_count++] = vk_required_instance_extensions[it];
   1046 
   1047 	#if BEAMFORMER_DEBUG
   1048 	{
   1049 		u32 layer_count = 0;
   1050 		vkEnumerateInstanceLayerProperties(&layer_count, 0);
   1051 
   1052 		VkLayerProperties *layers      = push_array(&arena, VkLayerProperties, layer_count);
   1053 		str8              *layer_str8s = push_array(&arena, str8,              layer_count);
   1054 		vkEnumerateInstanceLayerProperties(&layer_count, layers);
   1055 
   1056 		for (u32 i = 0; i < layer_count; i++)
   1057 			layer_str8s[i] = str8_from_c_str(layers[i].layerName);
   1058 
   1059 		for EachElement(vk_validation_layers, it) {
   1060 			for(u32 i = 0; i < layer_count; i++) {
   1061 				if (str8_equal(vk_validation_layers[it], layer_str8s[i])) {
   1062 					u32 index = enabled_validation_layers_count++;
   1063 					enabled_validation_layers[index]   = (char *)vk_validation_layers[it].data;
   1064 					vulkan_config.layers.enabled.E[it] = 1;
   1065 					vulkan_config.layers.version.E[it] = layers[i].specVersion;
   1066 					break;
   1067 				}
   1068 			}
   1069 		}
   1070 
   1071 		if (countof(vk_validation_layers) != enabled_validation_layers_count) {
   1072 			i32 missing_count = countof(vk_validation_layers) - enabled_validation_layers_count;
   1073 			stream_append_s8s(err, vulkan_info("missing validation layer"),
   1074 			                  missing_count > 1 ? s8("s:") : s8(":"), s8("\n"));
   1075 
   1076 			for EachElement(vk_validation_layers, it)
   1077 				if (vulkan_config.layers.enabled.E[it] == 0)
   1078 					stream_append_s8s(err, s8("    "), s8_from_str8(vk_validation_layers[it]), s8("\n"));
   1079 		}
   1080 
   1081 		u32 instance_extension_count = 0;
   1082 		vkEnumerateInstanceExtensionProperties(0, &instance_extension_count, 0);
   1083 
   1084 		VkExtensionProperties *instance_extensions = push_array(&arena, VkExtensionProperties, instance_extension_count);
   1085 		s8                    *instance_ext_s8s    = push_array(&arena, s8,                    instance_extension_count);
   1086 		vkEnumerateInstanceExtensionProperties(0, &instance_extension_count, instance_extensions);
   1087 		for EachIndex(instance_extension_count, it)
   1088 			instance_ext_s8s[it] = c_str_to_s8(instance_extensions[it].extensionName);
   1089 
   1090 		for EachElement(vk_instance_debug_extensions, it) {
   1091 			for EachIndex(instance_extension_count, i) {
   1092 				if (s8_equal(vk_instance_debug_extensions[it], instance_ext_s8s[i])) {
   1093 					u32 index = enabled_instance_extensions_count++;
   1094 					enabled_instance_extensions[index] = (char *)vk_instance_debug_extensions[it].data;
   1095 					vulkan_config.instance.E[it] = 1;
   1096 					break;
   1097 				}
   1098 			}
   1099 		}
   1100 	}
   1101 	#endif
   1102 
   1103 	VkApplicationInfo app_info = {
   1104 		.sType              = VK_STRUCTURE_TYPE_APPLICATION_INFO,
   1105 		.pApplicationName   = BEAMFORMER_NAME_STRING,
   1106 		.applicationVersion = 0,
   1107 		.pEngineName        = "No Engine",
   1108 		.engineVersion      = 0,
   1109 		.apiVersion         = VK_MAKE_API_VERSION(1, 3, 0, 0),
   1110 	};
   1111 
   1112 	VkInstanceCreateInfo instance_create_info = {
   1113 		.sType                   = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
   1114 		.pApplicationInfo        = &app_info,
   1115 		.ppEnabledExtensionNames = enabled_instance_extensions,
   1116 		.enabledExtensionCount   = enabled_instance_extensions_count,
   1117 		.ppEnabledLayerNames     = enabled_validation_layers,
   1118 		.enabledLayerCount       = enabled_validation_layers_count,
   1119 	};
   1120 
   1121 	#if 0 && BEAMFORMER_DEBUG
   1122 	VkValidationFeatureEnableEXT validation_feature_enables[] = {
   1123 		VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,
   1124 		VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT,
   1125 		VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT,
   1126 		VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
   1127 	};
   1128 
   1129 	VkValidationFeaturesEXT validation_features = {
   1130 		.sType                         = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
   1131 		.enabledValidationFeatureCount = countof(validation_feature_enables),
   1132 		.pEnabledValidationFeatures    = validation_feature_enables,
   1133 	};
   1134 
   1135 	instance_create_info.pNext = &validation_features;
   1136 	#endif
   1137 
   1138 	vkCreateInstance(&instance_create_info, 0, &vulkan_context->handle);
   1139 
   1140 	#define X(name, ...) name = (name##_fn *)vkGetInstanceProcAddr(vulkan_context->handle, #name);
   1141 	VkInstanceProcedureList
   1142 	#undef X
   1143 }
   1144 
   1145 function void
   1146 vk_load_physical_device(Arena arena, Stream *err)
   1147 {
   1148 	VulkanContext *vk = vulkan_context;
   1149 
   1150 	u32 device_count;
   1151 	vkEnumeratePhysicalDevices(vk->handle, &device_count, 0);
   1152 
   1153 	VkPhysicalDevice *devices = push_array(&arena, typeof(*devices), device_count);
   1154 	vkEnumeratePhysicalDevices(vk->handle, &device_count, devices);
   1155 
   1156 	i32 best_index = -1, best_score = -1;
   1157 	for (u32 i = 0; i < device_count; i++) {
   1158 		Arena scratch = arena;
   1159 		VkPhysicalDeviceProperties2 *dp = push_struct(&scratch, typeof(*dp));
   1160 		dp->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
   1161 		vkGetPhysicalDeviceProperties2(devices[i], dp);
   1162 
   1163 		i32 score = 0;
   1164 		if (dp->properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU)
   1165 			score++;
   1166 
   1167 		if (score > best_score) {
   1168 			best_score = score;
   1169 			best_index = (i32)i;
   1170 		}
   1171 	}
   1172 
   1173 	vk->physical_device = best_index >= 0 ? devices[best_index] : 0;
   1174 	if (!vk->physical_device)
   1175 		fatal(vulkan_info("failed to find a suitable GPU\n"));
   1176 
   1177 	VkPhysicalDeviceProperties2        dp   = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2};
   1178 	VkPhysicalDeviceVulkan11Properties v11p = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES};
   1179 	dp.pNext = &v11p;
   1180 
   1181 	vkGetPhysicalDeviceProperties2(vk->physical_device, &dp);
   1182 
   1183 	stream_append_s8s(err, vulkan_info("selecting device: "), c_str_to_s8(dp.properties.deviceName), s8("\n"));
   1184 	stream_append_s8(err, vulkan_info("Vulkan Version: "));
   1185 	{
   1186 		u32 dv = dp.properties.apiVersion;
   1187 		stream_appendf(err, "%u.%u.%u\n", VK_API_VERSION_MAJOR(dv), VK_API_VERSION_MINOR(dv), VK_API_VERSION_PATCH(dv));
   1188 	}
   1189 
   1190 	{
   1191 		Arena scratch = arena;
   1192 		u32 extension_count = 0;
   1193 		vkEnumerateDeviceExtensionProperties(vk->physical_device, 0, &extension_count, 0);
   1194 		VkExtensionProperties *extensions = push_array(&scratch, VkExtensionProperties, extension_count);
   1195 		vkEnumerateDeviceExtensionProperties(vk->physical_device, 0, &extension_count, extensions);
   1196 
   1197 		s8 *ext_str8s = push_array(&scratch, s8, extension_count);
   1198 		for (u32 index = 0; index < extension_count; index++)
   1199 			ext_str8s[index] = c_str_to_s8(extensions[index].extensionName);
   1200 
   1201 		b8 *supported = push_array(&scratch, b8, countof(vk_required_device_extensions));
   1202 		for EachIndex(extension_count, index)
   1203 			for EachElement(vk_required_device_extensions, it)
   1204 				supported[it] |= s8_equal(vk_required_device_extensions[it], ext_str8s[index]);
   1205 
   1206 		u32 supported_count = 0;
   1207 		for EachElement(vk_required_device_extensions, it)
   1208 			supported_count += supported[it];
   1209 
   1210 		u32 missing_count = countof(vk_required_device_extensions) - supported_count;
   1211 		if (missing_count) {
   1212 			stream_append_s8s(err, vulkan_info("fatal error: missing required device extension"),
   1213 			                  missing_count > 1 ? s8("s") : s8(""), s8(":\n"));
   1214 			for EachElement(vk_required_device_extensions, it) {
   1215 				if (!supported[it]) {
   1216 					s8 name = vk_required_device_extensions[it];
   1217 					stream_append_s8s(err, vulkan_info("    "), name, s8("\n"));
   1218 				}
   1219 			}
   1220 			fatal(stream_to_s8(err));
   1221 		}
   1222 
   1223 		for EachIndex(extension_count, index)
   1224 			for EachElement(vk_optional_device_extensions, it)
   1225 				vulkan_config.optional.E[it] |= s8_equal(vk_optional_device_extensions[it], ext_str8s[index]);
   1226 
   1227 		#if BEAMFORMER_DEBUG
   1228 		for EachIndex(extension_count, index)
   1229 			for EachElement(vk_debug_extensions, it)
   1230 				vulkan_config.debug.E[it] |= s8_equal(vk_debug_extensions[it], ext_str8s[index]);
   1231 		#endif
   1232 	}
   1233 
   1234 	{
   1235 		VkPhysicalDeviceFeatures2        df   = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2};
   1236 		VkPhysicalDeviceVulkan11Features v11f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES};
   1237 		VkPhysicalDeviceVulkan12Features v12f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES};
   1238 		VkPhysicalDeviceVulkan13Features v13f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES};
   1239 		df.pNext   = &v11f;
   1240 		v11f.pNext = &v12f;
   1241 		v12f.pNext = &v13f;
   1242 		vkGetPhysicalDeviceFeatures2(vk->physical_device, &df);
   1243 
   1244 		{
   1245 			b32 all_supported = 1;
   1246 			#define X(name, ...) all_supported &= df.features.name;
   1247 			VK_REQUIRED_PHYSICAL_FEATURES
   1248 			#undef X
   1249 
   1250 			if (!all_supported) {
   1251 				stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
   1252 				#define X(name, ...) if (!df.features.name) stream_append_s8(err, s8("    " #name "\n"));
   1253 				VK_REQUIRED_PHYSICAL_FEATURES
   1254 				#undef X
   1255 				fatal(stream_to_s8(err));
   1256 			}
   1257 		}
   1258 
   1259 		{
   1260 			b32 all_supported = 1;
   1261 			#define X(name, ...) all_supported &= v11f.name;
   1262 			VK_REQUIRED_PHYSICAL_11_FEATURES
   1263 			#undef X
   1264 
   1265 			if (!all_supported) {
   1266 				stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
   1267 				#define X(name, ...) if (!v11f.name) stream_append_s8(err, s8("    " #name "\n"));
   1268 				VK_REQUIRED_PHYSICAL_11_FEATURES
   1269 				#undef X
   1270 				fatal(stream_to_s8(err));
   1271 			}
   1272 		}
   1273 
   1274 		{
   1275 			b32 all_supported = 1;
   1276 			#define X(name, ...) all_supported &= v12f.name;
   1277 			VK_REQUIRED_PHYSICAL_12_FEATURES
   1278 			#undef X
   1279 
   1280 			if (!all_supported) {
   1281 				stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
   1282 				#define X(name, ...) if (!v12f.name) stream_append_s8(err, s8("    " #name "\n"));
   1283 				VK_REQUIRED_PHYSICAL_12_FEATURES
   1284 				#undef X
   1285 				fatal(stream_to_s8(err));
   1286 			}
   1287 		}
   1288 
   1289 		{
   1290 			b32 all_supported = 1;
   1291 			#define X(name, ...) all_supported &= v13f.name;
   1292 			VK_REQUIRED_PHYSICAL_13_FEATURES
   1293 			#undef X
   1294 
   1295 			if (!all_supported) {
   1296 				stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
   1297 				#define X(name, ...) if (!v13f.name) stream_append_s8(err, s8("    " #name "\n"));
   1298 				VK_REQUIRED_PHYSICAL_13_FEATURES
   1299 				#undef X
   1300 				fatal(stream_to_s8(err));
   1301 			}
   1302 		}
   1303 
   1304 		if (vulkan_config.optional.cooperative_matrix) {
   1305 			Arena scratch = arena;
   1306 			u32 property_count = 0;
   1307 			vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR(vk->physical_device, &property_count, 0);
   1308 
   1309 			VkCooperativeMatrixPropertiesKHR *mat = push_array(&scratch, VkCooperativeMatrixPropertiesKHR, property_count);
   1310 
   1311 			// NOTE(rnp): validation layer stupidity
   1312 			for EachIndex(property_count, it)
   1313 				mat[it].sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
   1314 
   1315 			vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR(vk->physical_device, &property_count, mat);
   1316 			b32 supported = 0;
   1317 			// TODO(rnp): for now the requirements are hardcoded, it is possible to support a couple
   1318 			// variations if needed.
   1319 			for EachIndex(property_count, it) {
   1320 				b32 match = 1;
   1321 				supported &= mat[it].scope == VK_SCOPE_SUBGROUP_KHR;
   1322 
   1323 				supported &= mat[it].MSize == 16;
   1324 				supported &= mat[it].NSize == 16;
   1325 				supported &= mat[it].KSize == 16;
   1326 
   1327 				supported &= mat[it].AType == VK_COMPONENT_TYPE_FLOAT16_KHR;
   1328 				supported &= mat[it].BType == VK_COMPONENT_TYPE_FLOAT16_KHR;
   1329 				supported &= mat[it].CType == VK_COMPONENT_TYPE_FLOAT32_KHR;
   1330 				supported &= mat[it].ResultType == VK_COMPONENT_TYPE_FLOAT32_KHR;
   1331 
   1332 				supported |= match;
   1333 			}
   1334 			vk->gpu_info.cooperative_matrix = supported;
   1335 		}
   1336 	}
   1337 
   1338 	VkPhysicalDeviceMemoryProperties2 mp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2};
   1339 	vkGetPhysicalDeviceMemoryProperties2(vk->physical_device, &mp);
   1340 
   1341 	VkPhysicalDeviceMemoryProperties *bmp = &mp.memoryProperties;
   1342 
   1343 	// NOTE(rnp): vulkan spec says that highest performance memory types must
   1344 	// come first. just take the first one found.
   1345 
   1346 	for (u32 i = 0; i < bmp->memoryHeapCount; i++) {
   1347 		if (bmp->memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
   1348 			vk->memory_info.gpu_heap_index = i;
   1349 			break;
   1350 		}
   1351 	}
   1352 
   1353 	for (u32 i = 0; i < bmp->memoryTypeCount; i++) {
   1354 		if (bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
   1355 			assert(bmp->memoryTypes[i].heapIndex == vk->memory_info.gpu_heap_index);
   1356 			vk->memory_info.memory_type_indices[VulkanMemoryKind_Device] = i;
   1357 			break;
   1358 		}
   1359 	}
   1360 
   1361 	// TODO(rnp): it is possible that this isn't available. for devices like that we would need
   1362 	// to copy into a staging buffer then DMA. For now that is unsupported.
   1363 	u32 bar_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT|VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
   1364 	i32 bar_index = -1;
   1365 	for (u32 i = 0; i < bmp->memoryTypeCount; i++) {
   1366 		if ((bmp->memoryTypes[i].propertyFlags & bar_flags) == bar_flags) {
   1367 			assert(bmp->memoryTypes[i].heapIndex == vk->memory_info.gpu_heap_index);
   1368 			bar_index = (i32)i;
   1369 			break;
   1370 		}
   1371 	}
   1372 
   1373 	// TODO(rnp): this shouldn't be fatal
   1374 	if (bar_index == -1) {
   1375 		stream_append_s8(err, vulkan_info("fatal error: GPU does not support host bar memory\n"));
   1376 		fatal(stream_to_s8(err));
   1377 	}
   1378 
   1379 	vk->memory_info.memory_type_indices[VulkanMemoryKind_BAR] = bar_index;
   1380 
   1381 	vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] = -1;
   1382 	for (u32 i = 0; i < bmp->memoryTypeCount; i++) {
   1383 		if ((bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) == 0) {
   1384 			if (bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
   1385 				vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] = (i8)i;
   1386 				break;
   1387 			}
   1388 		}
   1389 	}
   1390 
   1391 	if (vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] == -1) {
   1392 		stream_append_s8(err, vulkan_info("fatal error: vulkan driver does not provide host visible memory\n"));
   1393 		fatal(stream_to_s8(err));
   1394 	}
   1395 
   1396 	for EachElement(vk->memory_info.memory_type_indices, it) {
   1397 		u32 ti    = vk->memory_info.memory_type_indices[it];
   1398 		u32 flags = bmp->memoryTypes[ti].propertyFlags;
   1399 		vk->memory_info.memory_host_coherent[it] = (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
   1400 	}
   1401 
   1402 	vulkan_config.driver_api_version       = dp.properties.apiVersion;
   1403 	vk->memory_info.max_allocation_size    = v11p.maxMemoryAllocationSize;
   1404 	vk->memory_info.non_coherent_atom_size = dp.properties.limits.nonCoherentAtomSize;
   1405 	vk->gpu_info.vendor                    = dp.properties.vendorID;
   1406 	vk->gpu_info.gpu_heap_size             = bmp->memoryHeaps[vk->memory_info.gpu_heap_index].size;
   1407 	vk->gpu_info.timestamp_period_ns       = dp.properties.limits.timestampPeriod;
   1408 	vk->gpu_info.max_image_dimension_2D    = dp.properties.limits.maxImageDimension2D;
   1409 	vk->gpu_info.max_image_dimension_3D    = dp.properties.limits.maxImageDimension3D;
   1410 	vk->gpu_info.max_msaa_samples          = round_down_power_of_two(dp.properties.limits.framebufferColorSampleCounts);
   1411 	vk->gpu_info.subgroup_size             = v11p.subgroupSize;
   1412 	vk->gpu_info.max_compute_shared_memory_size = dp.properties.limits.maxComputeSharedMemorySize;
   1413 
   1414 	// IMPORTANT(rnp): memory must only be pushed at the end of the function
   1415 	vk->gpu_info.name = push_s8(&vk->arena, c_str_to_s8(dp.properties.deviceName));
   1416 
   1417 	#if BEAMFORMER_DEBUG
   1418 	{
   1419 		b32 mismatch = 0;
   1420 		for EachElement(vk_validation_layers, it) {
   1421 			u32 lv = vulkan_config.layers.version.E[it];
   1422 			u32 dv = vulkan_config.driver_api_version;
   1423 			if (lv < dv) {
   1424 				mismatch = 1;
   1425 				stream_append_s8s(err, vulkan_info("warning: validaton layer \""),
   1426 				                  s8_from_str8(vk_validation_layers[it]), s8("\" version: "));
   1427 				stream_appendf(err, "%u.%u.%u", VK_API_VERSION_MAJOR(lv), VK_API_VERSION_MINOR(lv), VK_API_VERSION_PATCH(lv));
   1428 				stream_append_s8(err, s8(" lower than driver API version: "));
   1429 				stream_appendf(err, "%u.%u.%u\n", VK_API_VERSION_MAJOR(dv), VK_API_VERSION_MINOR(dv), VK_API_VERSION_PATCH(dv));
   1430 			}
   1431 		}
   1432 
   1433 		if (mismatch)
   1434 			stream_append_s8(err, vulkan_info("DO NOT report any bugs without updating your validation layers!\n"));
   1435 	}
   1436 	#endif
   1437 }
   1438 
   1439 function void
   1440 vk_load_queues(Arena *memory, Stream *err)
   1441 {
   1442 	///////////////////////////////////////////////////////
   1443 	// NOTE(rnp): try to allocate an appropriate queue for
   1444 	// each of the following tasks:
   1445 	//   * UI Rendering (Graphics)
   1446 	//   * Beamforming  (Compute)
   1447 	//   * Upload       (Transfer)
   1448 	// Then create a logical device ready for use
   1449 
   1450 	VulkanContext *vk = vulkan_context;
   1451 
   1452 	u32 queue_family_count;
   1453 	vkGetPhysicalDeviceQueueFamilyProperties(vk->physical_device, &queue_family_count, 0);
   1454 
   1455 	TempArena arena_save = begin_temp_arena(memory);
   1456 	VkQueueFamilyProperties *queues = push_array(memory, typeof(*queues), queue_family_count);
   1457 	vkGetPhysicalDeviceQueueFamilyProperties(vk->physical_device, &queue_family_count, queues);
   1458 
   1459 	i32 queue_indices[VulkanQueueKind_Count];
   1460 	for EachElement(queue_indices, it) queue_indices[it] = -1;
   1461 
   1462 	///////////////////////////////////////////////////////////////
   1463 	// NOTE(rnp): start by assigning queue families for each queue
   1464 
   1465 	/* NOTE(rnp): try for exclusive transfer queue */
   1466 	#if !ForceSingleQueue
   1467 	{
   1468 		u32 mask = VK_QUEUE_GRAPHICS_BIT|VK_QUEUE_COMPUTE_BIT|VK_QUEUE_TRANSFER_BIT;
   1469 		u32 max_timestamp_bits = 0;
   1470 		for (u32 index = 0; index < queue_family_count; index++) {
   1471 			if ((queues[index].queueFlags & mask) == VK_QUEUE_TRANSFER_BIT) {
   1472 				if (queues[index].timestampValidBits > max_timestamp_bits) {
   1473 					max_timestamp_bits = queues[index].timestampValidBits;
   1474 					queue_indices[VulkanQueueKind_Transfer] = (i32)index;
   1475 				}
   1476 			}
   1477 		}
   1478 	}
   1479 
   1480 	/* NOTE(rnp): try for compute separate from graphics */
   1481 	for (u32 index = 0; index < queue_family_count; index++) {
   1482 		if ((queues[index].queueFlags & VK_QUEUE_COMPUTE_BIT)  != 0 &&
   1483 		    (queues[index].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0)
   1484 		{
   1485 			queue_indices[VulkanQueueKind_Compute] = (i32)index;
   1486 			break;
   1487 		}
   1488 	}
   1489 	#endif /* !ForceSingleQueue */
   1490 
   1491 	/* NOTE(rnp): find graphics family and verify it is exclusive */
   1492 	b32 multi_graphics = 0;
   1493 	for (u32 index = 0; index < queue_family_count; index++) {
   1494 		if ((queues[index].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) {
   1495 			// TODO(rnp): check for presentation support
   1496 			multi_graphics = queue_indices[VulkanQueueKind_Graphics] != -1;
   1497 			queue_indices[VulkanQueueKind_Graphics] = (i32)index;
   1498 		}
   1499 	}
   1500 
   1501 	if (multi_graphics)
   1502 		stream_append_s8(err, vulkan_info("warning: multiple queue families reported graphics support\n"));
   1503 
   1504 	if (queue_indices[VulkanQueueKind_Graphics] == -1) {
   1505 		stream_append_s8(err, vulkan_info("fatal error: GPU does not support graphics presentation\n"));
   1506 		fatal(stream_to_s8(err));
   1507 	}
   1508 
   1509 	if (queue_indices[VulkanQueueKind_Compute] == -1)
   1510 		if ((queues[queue_indices[VulkanQueueKind_Graphics]].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
   1511 			queue_indices[VulkanQueueKind_Compute] = queue_indices[VulkanQueueKind_Graphics];
   1512 
   1513 	if (queue_indices[VulkanQueueKind_Compute] == -1) {
   1514 		stream_append_s8(err, vulkan_info("fatal error: GPU does not support compute\n"));
   1515 		fatal(stream_to_s8(err));
   1516 	}
   1517 
   1518 	if (queue_indices[VulkanQueueKind_Transfer] == -1) {
   1519 		if ((queues[queue_indices[VulkanQueueKind_Compute]].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0)
   1520 			queue_indices[VulkanQueueKind_Transfer] = queue_indices[VulkanQueueKind_Compute];
   1521 		else if ((queues[queue_indices[VulkanQueueKind_Graphics]].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0)
   1522 			queue_indices[VulkanQueueKind_Transfer] = queue_indices[VulkanQueueKind_Graphics];
   1523 	}
   1524 
   1525 	if (queue_indices[VulkanQueueKind_Transfer] == -1) {
   1526 		stream_append_s8(err, vulkan_info("fatal error: GPU does not support data transfer\n"));
   1527 		fatal(stream_to_s8(err));
   1528 	}
   1529 
   1530 	/////////////////////////////////////////////////////////////////
   1531 	// NOTE(rnp): if queues share families try to allocate subqueues
   1532 
   1533 	u32 assigned_subindices[VulkanQueueKind_Count] = {0};
   1534 	i32 queue_subindices[VulkanQueueKind_Count]    = {0};
   1535 
   1536 	assigned_subindices[VulkanQueueKind_Graphics] += 1;
   1537 
   1538 	if (queue_indices[VulkanQueueKind_Compute] == queue_indices[VulkanQueueKind_Graphics]) {
   1539 		if (assigned_subindices[VulkanQueueKind_Graphics] < queues[queue_indices[VulkanQueueKind_Graphics]].queueCount)
   1540 			queue_subindices[VulkanQueueKind_Compute] = assigned_subindices[VulkanQueueKind_Graphics]++;
   1541 	} else {
   1542 		assigned_subindices[VulkanQueueKind_Compute] += 1;
   1543 	}
   1544 
   1545 	if (queue_indices[VulkanQueueKind_Transfer] == queue_indices[VulkanQueueKind_Graphics]) {
   1546 		if (assigned_subindices[VulkanQueueKind_Graphics] < queues[queue_indices[VulkanQueueKind_Graphics]].queueCount)
   1547 			queue_subindices[VulkanQueueKind_Transfer] = assigned_subindices[VulkanQueueKind_Graphics]++;
   1548 	} else if (queue_indices[VulkanQueueKind_Transfer] == queue_indices[VulkanQueueKind_Compute]) {
   1549 		if (assigned_subindices[VulkanQueueKind_Compute] < queues[queue_indices[VulkanQueueKind_Compute]].queueCount)
   1550 			queue_subindices[VulkanQueueKind_Transfer] = assigned_subindices[VulkanQueueKind_Compute]++;
   1551 	} else {
   1552 		assigned_subindices[VulkanQueueKind_Transfer] += 1;
   1553 	}
   1554 
   1555 	for EachElement(assigned_subindices, it)
   1556 		vk->unique_queues += assigned_subindices[it];
   1557 
   1558 	end_temp_arena(arena_save);
   1559 
   1560 	/////////////////////////////////////////////
   1561 	// NOTE(rnp): fill in info and create device
   1562 	for EachElement(vk->queues, it) {
   1563 		u32 index = queue_subindices[it];
   1564 		for (i32 i = 0; i < queue_indices[it]; i++)
   1565 			index += assigned_subindices[i];
   1566 		vk->queue_indices[it] = index;
   1567 	}
   1568 
   1569 	for EachElement(vk->queues, it) {
   1570 		if (vk->queues[vk->queue_indices[it]] == 0) {
   1571 			vk->queues[vk->queue_indices[it]] = push_struct(memory, VulkanQueue);
   1572 			vk->queues[vk->queue_indices[it]]->queue_family = queue_indices[it];
   1573 			vk->queues[vk->queue_indices[it]]->queue_index  = queue_subindices[it];
   1574 		}
   1575 		vk->queues[it] = vk->queues[vk->queue_indices[it]];
   1576 	}
   1577 
   1578 	for EachElement(vk->command_pools, it)
   1579 		vk->command_pools[it] = push_struct(memory, VulkanCommandPool);
   1580 
   1581 	VkDeviceQueueCreateInfo queue_create_infos[VulkanQueueKind_Count];
   1582 
   1583 	f32 queue_priorities[VulkanQueueKind_Count][VulkanQueueKind_Count];
   1584 	for (u32 i = 0; i < VulkanQueueKind_Count; i++)
   1585 		for (u32 j = 0; j < VulkanQueueKind_Count; j++)
   1586 			queue_priorities[i][j] = 1.0f;
   1587 	queue_priorities[queue_indices[VulkanQueueKind_Compute]][queue_subindices[VulkanQueueKind_Compute]] = 0.5f;
   1588 
   1589 	u32 queue_create_index = 0;
   1590 	b32 queue_info_filled[VulkanQueueKind_Count] = {0};
   1591 	for (u32 q = 0; q < vk->unique_queues; q++) {
   1592 		u32 base_q = queue_indices[q];
   1593 		if (!queue_info_filled[base_q]) {
   1594 			queue_create_infos[queue_create_index++] = (VkDeviceQueueCreateInfo){
   1595 				.sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
   1596 				.queueFamilyIndex = base_q,
   1597 				.queueCount       = assigned_subindices[q],
   1598 				.pQueuePriorities = queue_priorities[q],
   1599 			};
   1600 		}
   1601 		queue_info_filled[base_q] = 1;
   1602 	}
   1603 
   1604 	u32 enabled_count = 0;
   1605 	const char *enabled_extensions[MAX_ENABLED_EXTENSIONS];
   1606 
   1607 	for EachElement(vk_required_device_extensions, it)
   1608 		enabled_extensions[enabled_count++] = (char *)vk_required_device_extensions[it].data;
   1609 
   1610 	for EachElement(vk_optional_device_extensions, it)
   1611 		if (vulkan_config.optional.E[it])
   1612 			enabled_extensions[enabled_count++] = (char *)vk_optional_device_extensions[it].data;
   1613 
   1614 	for EachElement(vk_debug_extensions, it)
   1615 		if (vulkan_config.debug.E[it])
   1616 			enabled_extensions[enabled_count++] = (char *)vk_debug_extensions[it].data;
   1617 
   1618 	VkDeviceCreateInfo device_create_info = {
   1619 		.sType                   = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
   1620 		.pQueueCreateInfos       = queue_create_infos,
   1621 		.queueCreateInfoCount    = queue_create_index,
   1622 		.ppEnabledExtensionNames = enabled_extensions,
   1623 		.enabledExtensionCount   = enabled_count,
   1624 	};
   1625 
   1626 	VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR pdsre = {
   1627 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR,
   1628 		.shaderRelaxedExtendedInstruction = 1,
   1629 	};
   1630 	if (vulkan_config.debug.shader_relaxed_extended_instruction) {
   1631 		pdsre.pNext = (void *)device_create_info.pNext;
   1632 		device_create_info.pNext = &pdsre;
   1633 	}
   1634 
   1635 	VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_mat_features = {
   1636 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR,
   1637 		.cooperativeMatrix = 1,
   1638 		.cooperativeMatrixRobustBufferAccess = 0,
   1639 	};
   1640 	if (vk->gpu_info.cooperative_matrix) {
   1641 		coop_mat_features.pNext = (void *)device_create_info.pNext;
   1642 		device_create_info.pNext = &coop_mat_features;
   1643 	}
   1644 
   1645 	VkPhysicalDeviceRobustness2FeaturesKHR robust2 = {
   1646 		.sType          = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_KHR,
   1647 		.pNext          = (void *)device_create_info.pNext,
   1648 		.nullDescriptor = 1,
   1649 	};
   1650 	device_create_info.pNext = &robust2;
   1651 
   1652 	VkPhysicalDeviceVulkan13Features v13f = {
   1653 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
   1654 		.pNext = (void *)device_create_info.pNext,
   1655 		#define X(name, ...) .name = 1,
   1656 		VK_REQUIRED_PHYSICAL_13_FEATURES
   1657 		#undef X
   1658 	};
   1659 	device_create_info.pNext = &v13f;
   1660 
   1661 	VkPhysicalDeviceVulkan12Features v12f = {
   1662 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
   1663 		.pNext = (void *)device_create_info.pNext,
   1664 		#define X(name, ...) .name = 1,
   1665 		VK_REQUIRED_PHYSICAL_12_FEATURES
   1666 		#undef X
   1667 	};
   1668 	device_create_info.pNext = &v12f;
   1669 
   1670 	VkPhysicalDeviceVulkan11Features v11f = {
   1671 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
   1672 		.pNext = (void *)device_create_info.pNext,
   1673 		#define X(name, ...) .name = 1,
   1674 		VK_REQUIRED_PHYSICAL_11_FEATURES
   1675 		#undef X
   1676 	};
   1677 	device_create_info.pNext = &v11f;
   1678 
   1679 	VkPhysicalDeviceFeatures2 device_features = {
   1680 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
   1681 		.pNext = (void *)device_create_info.pNext,
   1682 		.features = {
   1683 			#define X(name, ...) .name = 1,
   1684 			VK_REQUIRED_PHYSICAL_FEATURES
   1685 			#undef X
   1686 		},
   1687 	};
   1688 	device_create_info.pNext = &device_features;
   1689 
   1690 	vkCreateDevice(vk->physical_device, &device_create_info, 0, &vk->device);
   1691 
   1692 	#define X(name, ...) name = (name##_fn *)vkGetDeviceProcAddr(vk->device, #name);
   1693 	VkDeviceProcedureList
   1694 	#undef X
   1695 
   1696 	for (u32 q = 0; q < vk->unique_queues; q++) {
   1697 		VulkanQueue *qp = vk->queues[q];
   1698 		vkGetDeviceQueue(vk->device, qp->queue_family, qp->queue_index, &qp->queue);
   1699 
   1700 		qp->timeline_semaphore = vk_make_semaphore(0);
   1701 	}
   1702 
   1703 	vk->queues[VulkanQueueKind_Graphics]->pipeline_stage_flags |= VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT;
   1704 	vk->queues[VulkanQueueKind_Compute]->pipeline_stage_flags  |= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
   1705 
   1706 	for EachElement(vk->command_pools, it) {
   1707 		VulkanCommandPool *vcp = vk->command_pools[it];
   1708 
   1709 		VkCommandPoolCreateInfo command_pool_create_info = {
   1710 			.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
   1711 			.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
   1712 			.queueFamilyIndex = vk->queues[it]->queue_family,
   1713 		};
   1714 
   1715 		vkCreateCommandPool(vk->device, &command_pool_create_info, 0, &vcp->handle);
   1716 
   1717 		VkCommandBufferAllocateInfo command_buffer_allocate_info = {
   1718 			.sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
   1719 			.commandPool        = vcp->handle,
   1720 			.level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
   1721 			.commandBufferCount = countof(vcp->buffers),
   1722 		};
   1723 		vkAllocateCommandBuffers(vk->device, &command_buffer_allocate_info, vcp->buffers);
   1724 
   1725 		VkQueryPoolCreateInfo query_pool_create_info = {
   1726 			.sType      = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
   1727 			.queryType  = VK_QUERY_TYPE_TIMESTAMP,
   1728 			.queryCount = MaxCommandBuffersInFlight * MaxCommandBufferTimestamps,
   1729 		};
   1730 		vkCreateQueryPool(vk->device, &query_pool_create_info, 0, &vcp->query_pool);
   1731 	}
   1732 }
   1733 
   1734 function void
   1735 vk_load_graphics(void)
   1736 {
   1737 	VulkanContext *vk = vulkan_context;
   1738 
   1739 	// NOTE: swap chain image format
   1740 	{
   1741 	}
   1742 
   1743 	// NOTE: depth/stencil format
   1744 	{
   1745 		VkFormat depth_formats[] = {
   1746 			VK_FORMAT_D32_SFLOAT_S8_UINT,
   1747 			VK_FORMAT_D24_UNORM_S8_UINT,
   1748 			VK_FORMAT_D16_UNORM_S8_UINT,
   1749 		};
   1750 
   1751 		vk->depth_stencil_format = VK_FORMAT_UNDEFINED;
   1752 		for EachElement(depth_formats, it) {
   1753 			VkFormatProperties3 format_properties3 = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3};
   1754 			VkFormatProperties2 format_properties2 = {
   1755 				.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
   1756 				.pNext = &format_properties3,
   1757 			};
   1758 			vkGetPhysicalDeviceFormatProperties2(vk->physical_device, depth_formats[it], &format_properties2);
   1759 			if (format_properties3.optimalTilingFeatures & VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT) {
   1760 				vk->depth_stencil_format = depth_formats[it];
   1761 				break;
   1762 			}
   1763 		}
   1764 	}
   1765 }
   1766 
   1767 function void
   1768 vk_load_descriptor_block(void)
   1769 {
   1770 	// NOTE(rnp):
   1771 	// * One Descriptor Pool
   1772 	// * One Descriptor Set Per Resource Kind
   1773 	// * Shaders know the ResourceKind enumeration
   1774 	// * Shaders know the per set binding points
   1775 
   1776 	VulkanContext *vk = vulkan_context;
   1777 
   1778 	// NOTE(rnp): Pool
   1779 	VkDescriptorPoolSize pool_sizes[] = {
   1780 		{
   1781 			.type            = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
   1782 			.descriptorCount = BeamformerShaderBufferSlot_Count,
   1783 		},
   1784 	};
   1785 	static_assert(countof(pool_sizes) == BeamformerShaderResourceKind_Count, "");
   1786 
   1787 	VkDescriptorPoolCreateInfo pool_create_info = {
   1788 		.sType         = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
   1789 		.maxSets       = BeamformerShaderResourceKind_Count,
   1790 		.poolSizeCount = countof(pool_sizes),
   1791 		.pPoolSizes    = pool_sizes,
   1792 	};
   1793 
   1794 	vkCreateDescriptorPool(vk->device, &pool_create_info, 0, &vk->descriptor_pool);
   1795 
   1796 	// NOTE(rnp): Set Layouts
   1797 	VkDescriptorSetLayoutCreateInfo layout_create_info = {
   1798 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
   1799 	};
   1800 
   1801 	{
   1802 		VkDescriptorSetLayoutBinding layout_bindings[BeamformerShaderBufferSlot_Count];
   1803 		for EachEnumValue(BeamformerShaderBufferSlot, it) {
   1804 			layout_bindings[it] = (VkDescriptorSetLayoutBinding){
   1805 				.binding         = it,
   1806 				.descriptorType  = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
   1807 				.descriptorCount = 1,
   1808 				.stageFlags      = VK_SHADER_STAGE_ALL,
   1809 			};
   1810 		}
   1811 		layout_create_info.bindingCount = countof(layout_bindings),
   1812 		layout_create_info.pBindings    = layout_bindings,
   1813 		vkCreateDescriptorSetLayout(vk->device, &layout_create_info, 0,
   1814 		                            vk->descriptor_set_layouts + BeamformerShaderResourceKind_Buffer);
   1815 	}
   1816 
   1817 	// NOTE(rnp): Sets
   1818 	VkDescriptorSetAllocateInfo set_allocate_info = {
   1819 		.sType              = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
   1820 		.descriptorPool     = vk->descriptor_pool,
   1821 		.descriptorSetCount = countof(vk->descriptor_sets),
   1822 		.pSetLayouts        = vk->descriptor_set_layouts,
   1823 	};
   1824 	static_assert(countof(vk->descriptor_set_layouts) == countof(vk->descriptor_sets), "");
   1825 	vkAllocateDescriptorSets(vk->device, &set_allocate_info, vk->descriptor_sets);
   1826 
   1827 	vk_label_object(DESCRIPTOR_POOL, vk->descriptor_pool, s8("Beamformer Resources"), s8("Pool"));
   1828 
   1829 	DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock)) {
   1830 		Arena scratch = vk->arena;
   1831 		for EachElement(vk->descriptor_sets, it) {
   1832 			Stream sb = arena_stream(scratch);
   1833 			stream_append_s8s(&sb, s8("Beamformer "), beamformer_shader_resource_kind_strings[it], s8("s"));
   1834 			vk_label_object(DESCRIPTOR_SET,        vk->descriptor_sets[it],        stream_to_s8(&sb), s8("Set"));
   1835 			vk_label_object(DESCRIPTOR_SET_LAYOUT, vk->descriptor_set_layouts[it], stream_to_s8(&sb), s8("Set Layout"));
   1836 		}
   1837 	}
   1838 
   1839 	// NOTE(rnp): junk API requirement that doesn't allow 0 initialization
   1840 	for EachElement(vk->descriptor_buffer_infos, it)
   1841 		vk->descriptor_buffer_infos[it].range = VK_WHOLE_SIZE;
   1842 }
   1843 
   1844 ///////////////////////
   1845 // NOTE(rnp): User API
   1846 
   1847 DEBUG_IMPORT void
   1848 vk_load(OSLibrary vulkan_library_handle, Arena *memory, Stream *err)
   1849 {
   1850 	#define X(name, ...) name = (name##_fn *)os_lookup_symbol(vulkan_library_handle, #name);
   1851 	VkLoaderProcedureList
   1852 	#undef X
   1853 
   1854 	if (!vkGetInstanceProcAddr) {
   1855 		stream_append_s8(err, vulkan_info("fatal error: failed to find \"vkGetInstanceProcAddr\"\n"));
   1856 		fatal(stream_to_s8(err));
   1857 	}
   1858 
   1859 	VulkanContext *vk = vulkan_context;
   1860 	vk->entity_arena = sub_arena_end(memory, KB(64), KB(4));
   1861 	vk->arena        = sub_arena_end(memory, KB(96), KB(4));
   1862 
   1863 	vk_load_instance(vk->arena, err);
   1864 	vk_load_physical_device(vk->arena, err);
   1865 	vk_load_queues(&vk->arena, err);
   1866 	vk_load_graphics();
   1867 	vk_load_descriptor_block();
   1868 
   1869 	read_only local_persist s8 default_compute_shader = s8(""
   1870 		"#version 430 core\n"
   1871 		"layout(push_constant) uniform pc { uint data[256 / 4]; };\n"
   1872 		"void main() {}\n"
   1873 		"\n");
   1874 	vk->default_compute_pipeline = vk_compute_pipeline_from_shader_text(vk->arena, default_compute_shader,
   1875 	                                                                    s8("error_compute_shader"), 256);
   1876 
   1877 	read_only local_persist s8 default_vertex_shader = s8(""
   1878 		"#version 430 core\n"
   1879 		"layout(push_constant) uniform pc { uint data[256 / 4]; };\n"
   1880 		"void main() {gl_Position = vec4(0);}\n"
   1881 		"\n");
   1882 	read_only local_persist s8 default_fragment_shader = s8(""
   1883 		"#version 430 core\n"
   1884 		"layout(location = 0) out vec4 out_colour;"
   1885 		"layout(push_constant) uniform pc { uint data[256 / 4]; };\n"
   1886 		"void main() {out_colour = vec4(0.5f, 0.0f, 0.5f, 1.0f);}\n"
   1887 		"\n");
   1888 
   1889 	VulkanPipelineCreateInfo pipeline_create_infos[2] = {
   1890 		{
   1891 			.kind = VulkanShaderKind_Vertex,
   1892 			.text = default_vertex_shader,
   1893 			.name = s8("error_vertex_shader"),
   1894 		},
   1895 		{
   1896 			.kind = VulkanShaderKind_Fragment,
   1897 			.text = default_fragment_shader,
   1898 			.name = s8("error_fragment_shader"),
   1899 		},
   1900 	};
   1901 	vk->default_graphics_pipeline = vk_graphics_pipeline_from_infos(vk->arena, pipeline_create_infos, 2, 256);
   1902 
   1903 	// TODO: setup ui render pipeline
   1904 
   1905 	if (err->widx > 0) {
   1906 		os_console_log(err->data, err->widx);
   1907 		stream_reset(err, 0);
   1908 	}
   1909 }
   1910 
   1911 DEBUG_IMPORT GPUInfo *
   1912 vk_gpu_info(void)
   1913 {
   1914 	return &vulkan_context->gpu_info;
   1915 }
   1916 
   1917 function void
   1918 vk_vulkan_buffer_release(VulkanBuffer *vb)
   1919 {
   1920 	VulkanContext *vk = vulkan_context;
   1921 	VulkanEntity  *e  = (VulkanEntity *)((u8 *)vb - offsetof(VulkanEntity, as));
   1922 	// TODO(rnp): this happens implicitly, probably just delete this if block
   1923 	if (vb->host_pointer)
   1924 		vkUnmapMemory(vk->device, vb->memory);
   1925 
   1926 	if (vb->buffer)
   1927 		vkDestroyBuffer(vk->device, vb->buffer, 0);
   1928 
   1929 	vk_release_memory(vb->memory, vb->memory_kind != VulkanMemoryKind_Host ? vb->memory_size : 0);
   1930 	vk_entity_release(e);
   1931 }
   1932 
   1933 DEBUG_IMPORT void
   1934 vk_buffer_release(GPUBuffer *b)
   1935 {
   1936 	if ValidVulkanHandle(b->handle)
   1937 		vk_vulkan_buffer_release(vk_entity_data(b->handle, VulkanEntityKind_Buffer));
   1938 	zero_struct(b);
   1939 }
   1940 
   1941 DEBUG_IMPORT void
   1942 vk_buffer_allocate(GPUBuffer *b, GPUBufferAllocateInfo *info)
   1943 {
   1944 	VulkanContext *vk = vulkan_context;
   1945 
   1946 	vk_buffer_release(b);
   1947 
   1948 	assert(info->size > 0);
   1949 
   1950 	VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Buffer);
   1951 	VulkanBufferAllocateInfo vulkan_buffer_allocate_info = {
   1952 		.gpu_buffer = b,
   1953 		.size       = (u64)info->size,
   1954 		.flags      = info->flags,
   1955 		.index_type = VK_INDEX_TYPE_NONE_KHR,
   1956 		.label      = info->label,
   1957 	};
   1958 
   1959 	u32 queue_index_hit_count[VulkanQueueKind_Count] = {0};
   1960 	for (u32 it = 0; it < info->timeline_count; it++)
   1961 		queue_index_hit_count[vk->queue_indices[info->timelines_used[it]]]++;
   1962 
   1963 	for EachElement(queue_index_hit_count, it) {
   1964 		if (queue_index_hit_count[it] > 0) {
   1965 			u32 index = vulkan_buffer_allocate_info.queue_family_count++;
   1966 			vulkan_buffer_allocate_info.queue_family_indices[index] = vk->queues[vk->queue_indices[it]]->queue_family;
   1967 		}
   1968 	}
   1969 
   1970 	if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) {
   1971 		b->handle.value[0] = (u64)e;
   1972 	} else {
   1973 		vk_entity_release(e);
   1974 	}
   1975 }
   1976 
   1977 DEBUG_IMPORT b32
   1978 vk_buffer_needs_sync(GPUBuffer *b)
   1979 {
   1980 	b32 result = 0;
   1981 	if ValidVulkanHandle(b->handle) {
   1982 		VulkanBuffer *vb = vk_entity_data(b->handle, VulkanEntityKind_Buffer);
   1983 
   1984 		// TODO(rnp): not correct check. need to check if we used transfer queue
   1985 		result = vb->memory_kind != VulkanMemoryKind_BAR;
   1986 	}
   1987 
   1988 	return result;
   1989 }
   1990 
   1991 DEBUG_IMPORT u64
   1992 vk_round_up_to_sync_size(u64 size, u64 min)
   1993 {
   1994 	iz  round  = (iz)Max(min, vulkan_context->memory_info.non_coherent_atom_size);
   1995 	u64 result = (u64)round_up_to((iz)size, round);
   1996 	return result;
   1997 }
   1998 
   1999 function force_inline void
   2000 vk_buffer_buffer_copy(VulkanBuffer *destination, VulkanBuffer *source, u64 destination_offset, u64 source_offset, u64 size, b32 non_temporal)
   2001 {
   2002 	VulkanContext *vk = vulkan_context;
   2003 
   2004 	switch (source->memory_kind) {
   2005 	case VulkanMemoryKind_BAR:
   2006 	{
   2007 		switch (destination->memory_kind) {
   2008 		case VulkanMemoryKind_Host:{
   2009 			if (destination->memory) {
   2010 				// TODO(rnp): there is likely a more efficient way of doing this in this case
   2011 				InvalidCodePath;
   2012 			} else {
   2013 				assert(source->host_pointer);
   2014 				b32 coherent = vk->memory_info.memory_host_coherent[source->memory_kind];
   2015 				if (!coherent) {
   2016 					u64 nca_size = vk->memory_info.non_coherent_atom_size;
   2017 					VkMappedMemoryRange mrs[1] = {{
   2018 						.sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
   2019 						.memory = source->memory,
   2020 						.offset = source_offset - (source_offset % nca_size),
   2021 						.size   = vk_round_up_to_sync_size(size, nca_size),
   2022 					}};
   2023 					vkInvalidateMappedMemoryRanges(vk->device, countof(mrs), mrs);
   2024 				}
   2025 
   2026 				void *dest = (u8 *)destination->host_pointer + destination_offset;
   2027 				void *src  = (u8 *)source->host_pointer + source_offset;
   2028 
   2029 				// NOTE(rnp): don't trash the CPU cache for large data stores
   2030 				if (non_temporal) memory_copy_non_temporal(dest, src, size);
   2031 				else              mem_copy(dest, src, size);
   2032 			}
   2033 		}break;
   2034 		InvalidDefaultCase;
   2035 		}
   2036 	}break;
   2037 
   2038 	case VulkanMemoryKind_Host:{
   2039 		switch (destination->memory_kind) {
   2040 		case VulkanMemoryKind_BAR:{
   2041 			assert(destination->host_pointer);
   2042 
   2043 			void *dest = (u8 *)destination->host_pointer + destination_offset;
   2044 			void *src  = (u8 *)source->host_pointer + source_offset;
   2045 
   2046 			// NOTE(rnp): don't trash the CPU cache for large data stores
   2047 			if (non_temporal) memory_copy_non_temporal(dest, src, size);
   2048 			else              mem_copy(dest, src, size);
   2049 
   2050 			b32 coherent = vk->memory_info.memory_host_coherent[destination->memory_kind];
   2051 			if (!coherent) {
   2052 				u64 nca_size = vk->memory_info.non_coherent_atom_size;
   2053 				VkMappedMemoryRange mrs[1] = {{
   2054 					.sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
   2055 					.memory = destination->memory,
   2056 					.offset = destination_offset - (destination_offset % nca_size),
   2057 					.size   = vk_round_up_to_sync_size(size, nca_size),
   2058 				}};
   2059 				vkFlushMappedMemoryRanges(vk->device, countof(mrs), mrs);
   2060 			}
   2061 		}break;
   2062 		InvalidDefaultCase;
   2063 
   2064 		}
   2065 	}break;
   2066 
   2067 	// TODO(rnp): use transfer queue when not mapped
   2068 	InvalidDefaultCase;
   2069 	}
   2070 }
   2071 
   2072 DEBUG_IMPORT void
   2073 vk_buffer_range_upload(GPUBuffer *b, void *data, u64 offset, u64 size, b32 non_temporal)
   2074 {
   2075 	VulkanBuffer *db = vk_entity_data(b->handle, VulkanEntityKind_Buffer);
   2076 	VulkanBuffer  sb = {
   2077 		.host_pointer = data,
   2078 		.memory_kind  = VulkanMemoryKind_Host,
   2079 	};
   2080 	vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal);
   2081 }
   2082 
   2083 DEBUG_IMPORT void
   2084 vk_buffer_range_download(void *destination, GPUBuffer *source, u64 offset, u64 size, b32 non_temporal)
   2085 {
   2086 	VulkanBuffer *sb = vk_entity_data(source->handle, VulkanEntityKind_Buffer);
   2087 	VulkanBuffer  db = {
   2088 		.host_pointer = destination,
   2089 		.memory_kind  = VulkanMemoryKind_Host,
   2090 	};
   2091 	vk_buffer_buffer_copy(&db, sb, 0, offset, size, non_temporal);
   2092 }
   2093 
   2094 DEBUG_IMPORT void
   2095 vk_render_model_release(GPUBuffer *model)
   2096 {
   2097 	if ValidVulkanHandle(model->handle)
   2098 		vk_vulkan_buffer_release(vk_entity_data(model->handle, VulkanEntityKind_RenderModel));
   2099 	zero_struct(model);
   2100 }
   2101 
   2102 DEBUG_IMPORT void
   2103 vk_render_model_allocate(GPUBuffer *model, void *indices, u64 index_count, u64 model_size, s8 label)
   2104 {
   2105 	vk_render_model_release(model);
   2106 
   2107 	VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_RenderModel);
   2108 
   2109 	assert(index_count <= U32_MAX);
   2110 	VkIndexType index_type;
   2111 	if (index_count <= U16_MAX) index_type = VK_INDEX_TYPE_UINT16;
   2112 	else                        index_type = VK_INDEX_TYPE_UINT32;
   2113 
   2114 	i64 indices_size = round_up_to(vk_index_size(index_type) * index_count, 64);
   2115 
   2116 	i64 size = round_up_to(model_size + indices_size, 64);
   2117 	assert(size > 0);
   2118 
   2119 	VulkanBufferAllocateInfo vulkan_buffer_allocate_info = {
   2120 		.gpu_buffer              = model,
   2121 		.size                    = (u64)size,
   2122 		.flags                   = VulkanUsageFlag_HostReadWrite,
   2123 		.index_type              = index_type,
   2124 		.label                   = label,
   2125 		.queue_family_count      = 1,
   2126 		.queue_family_indices[0] = vulkan_context->queues[VulkanQueueKind_Graphics]->queue_family,
   2127 	};
   2128 	if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) {
   2129 		model->handle.value[0] = (u64)e;
   2130 		model->index_count  = index_count;
   2131 		model->gpu_pointer += indices_size;
   2132 
   2133 		VulkanBuffer  sb = {
   2134 			.host_pointer = indices,
   2135 			.memory_kind  = VulkanMemoryKind_Host,
   2136 		};
   2137 
   2138 		vk_buffer_buffer_copy(&e->as.buffer, &sb, 0, 0, vk_index_size(index_type) * index_count, 0);
   2139 	} else {
   2140 		vk_entity_release(e);
   2141 	}
   2142 }
   2143 
   2144 DEBUG_IMPORT void
   2145 vk_render_model_range_upload(GPUBuffer *model, void *data, u64 offset, u64 size, b32 non_temporal)
   2146 {
   2147 	VulkanBuffer *db = vk_entity_data(model->handle, VulkanEntityKind_RenderModel);
   2148 	VulkanBuffer  sb = {
   2149 		.host_pointer = data,
   2150 		.memory_kind  = VulkanMemoryKind_Host,
   2151 	};
   2152 
   2153 	offset += round_up_to(vk_index_size(db->index_type) * model->index_count, 64);
   2154 
   2155 	vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal);
   2156 }
   2157 
   2158 DEBUG_IMPORT void
   2159 vk_image_release(GPUImage *image)
   2160 {
   2161 	if ValidVulkanHandle(image->image) {
   2162 		VulkanContext *vk = vulkan_context;
   2163 		VulkanImage   *vi = vk_entity_data(image->image, VulkanEntityKind_Image);
   2164 
   2165 		vkDestroyImageView(vk->device, vi->view, 0);
   2166 		vkDestroyImage(vk->device, vi->image, 0);
   2167 		vk_release_memory(vi->memory, image->memory_size);
   2168 
   2169 		vk_entity_release((VulkanEntity *)image->image.value[0]);
   2170 	}
   2171 	zero_struct(image);
   2172 }
   2173 
   2174 DEBUG_IMPORT void
   2175 vk_image_allocate(GPUImage *image, u32 width, u32 height, u32 mips, u32 samples,
   2176                   VulkanImageUsage usage, VulkanUsageFlags flags, OSHandle *export, s8 label)
   2177 {
   2178 	assert(IsPowerOfTwo(samples));
   2179 
   2180 	vk_image_release(image);
   2181 
   2182 	VulkanContext *vk = vulkan_context;
   2183 	VulkanEntity  *e  = vk_entity_allocate(VulkanEntityKind_Image);
   2184 	VulkanImage   *vi = &e->as.image;
   2185 
   2186 	image->image.value[0] = (u64)e;
   2187 	image->width          = Min(width,   vk->gpu_info.max_image_dimension_2D);
   2188 	image->height         = Min(height,  vk->gpu_info.max_image_dimension_2D);
   2189 	image->mip_map_levels = Max(mips,    1);
   2190 	image->samples        = Min(samples, vk->gpu_info.max_msaa_samples);
   2191 
   2192 	VkFormat usage_format_map[VulkanImageUsage_Count + 1] = {
   2193 		[VulkanImageUsage_None]         = VK_FORMAT_UNDEFINED,
   2194 		//[VulkanImageUsage_Colour]       = VK_FORMAT_R8G8B8A8_SRGB,
   2195 		[VulkanImageUsage_Colour]       = VK_FORMAT_R8G8B8A8_UNORM,
   2196 		[VulkanImageUsage_DepthStencil] = vk->depth_stencil_format,
   2197 		[VulkanImageUsage_Count]        = VK_FORMAT_UNDEFINED,
   2198 	};
   2199 
   2200 	read_only local_persist VkImageUsageFlagBits usage_extra_bit_map[VulkanImageUsage_Count + 1] = {
   2201 		[VulkanImageUsage_None]         = 0,
   2202 		[VulkanImageUsage_Colour]       = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
   2203 		[VulkanImageUsage_DepthStencil] = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
   2204 		[VulkanImageUsage_Count]        = 0,
   2205 	};
   2206 
   2207 	read_only local_persist VkImageAspectFlags usage_image_aspect_map[VulkanImageUsage_Count + 1] = {
   2208 		[VulkanImageUsage_None]         = 0,
   2209 		[VulkanImageUsage_Colour]       = VK_IMAGE_ASPECT_COLOR_BIT,
   2210 		[VulkanImageUsage_DepthStencil] = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT,
   2211 		[VulkanImageUsage_Count]        = 0,
   2212 	};
   2213 
   2214 	usage = Clamp((u32)usage, 0, VulkanImageUsage_Count);
   2215 	VkImageUsageFlagBits usage_flags = usage_extra_bit_map[usage];
   2216 
   2217 	if (flags & VulkanUsageFlag_ImageSampling)       usage_flags |= VK_IMAGE_USAGE_SAMPLED_BIT;
   2218 	if (flags & VulkanUsageFlag_TransferSource)      usage_flags |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
   2219 	if (flags & VulkanUsageFlag_TransferDestination) usage_flags |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
   2220 
   2221 	u32 queue_family = vk->queues[VulkanQueueKind_Graphics]->queue_family;
   2222 	VkImageCreateInfo image_create_info = {
   2223 		.sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
   2224 		.flags                 = export ? VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT : 0,
   2225 		.imageType             = VK_IMAGE_TYPE_2D,
   2226 		.format                = usage_format_map[usage],
   2227 		.extent                = {image->width, image->height, 1},
   2228 		.mipLevels             = image->mip_map_levels,
   2229 		.arrayLayers           = 1,
   2230 		.samples               = image->samples,
   2231 		.tiling                = VK_IMAGE_TILING_OPTIMAL,
   2232 		.usage                 = usage_flags,
   2233 		// NOTE(rnp): needed if multiple queue families are accessed
   2234 		.sharingMode           = VK_SHARING_MODE_EXCLUSIVE,
   2235 		.queueFamilyIndexCount = 1,
   2236 		.pQueueFamilyIndices   = &queue_family,
   2237 		.initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED,
   2238 	};
   2239 
   2240 	VkExternalMemoryImageCreateInfo external_memory_image_create_info = {
   2241 		.sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
   2242 		.handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
   2243 		                          : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
   2244 	};
   2245 
   2246 	if (export) image_create_info.pNext = &external_memory_image_create_info;
   2247 
   2248 	vkCreateImage(vk->device, &image_create_info, 0, &vi->image);
   2249 
   2250 	VkMemoryRequirements memory_requirements;
   2251 	vkGetImageMemoryRequirements(vk->device, vi->image, &memory_requirements);
   2252 
   2253 	VkMemoryDedicatedAllocateInfo dedicated_allocate_info = {
   2254 		.sType  = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
   2255 		.image  = vi->image,
   2256 	};
   2257 
   2258 	if (vk_allocate_memory(&vi->memory, memory_requirements.size, VulkanMemoryKind_Device, 0, &dedicated_allocate_info, export)) {
   2259 		image->memory_size = memory_requirements.size;
   2260 		vkBindImageMemory(vk->device, vi->image, vi->memory, 0);
   2261 
   2262 		VkImageViewCreateInfo image_view_info = {
   2263 			.sType      = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
   2264 			.image      = vi->image,
   2265 			.viewType   = VK_IMAGE_VIEW_TYPE_2D,
   2266 			.format     = usage_format_map[usage],
   2267 			.subresourceRange = {
   2268 				.aspectMask     = usage_image_aspect_map[usage],
   2269 				.baseMipLevel   = 0,
   2270 				.levelCount     = 1,
   2271 				.baseArrayLayer = 0,
   2272 				.layerCount     = 1,
   2273 			},
   2274 		};
   2275 		vkCreateImageView(vk->device, &image_view_info, 0, &vi->view);
   2276 
   2277 		vk_label_object(IMAGE,         vi->image,  label, s8("Image"));
   2278 		vk_label_object(IMAGE_VIEW,    vi->view,   label, s8("Image View"));
   2279 		vk_label_object(DEVICE_MEMORY, vi->memory, label, s8("Memory"));
   2280 	} else {
   2281 		vkDestroyImage(vk->device, vi->image, 0);
   2282 		vk_entity_release(e);
   2283 		zero_struct(image);
   2284 	}
   2285 }
   2286 
   2287 DEBUG_IMPORT VulkanHandle
   2288 vk_create_semaphore(OSHandle *export)
   2289 {
   2290 	VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Semaphore);
   2291 	e->as.semaphore = vk_make_semaphore(export);
   2292 	VulkanHandle result = {(u64)e};
   2293 	return result;
   2294 }
   2295 
   2296 DEBUG_IMPORT b32
   2297 vk_host_wait_timeline(VulkanTimeline timeline, u64 value, u64 timeout_ns)
   2298 {
   2299 	b32 result = 0;
   2300 	if Between(timeline, 0, VulkanTimeline_Count - 1) {
   2301 		VulkanContext *vk = vulkan_context;
   2302 		VulkanQueue   *vq = vk->queues[timeline];
   2303 		VkSemaphoreWaitInfo semaphore_wait_info = {
   2304 			.sType          = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
   2305 			.pSemaphores    = &vq->timeline_semaphore.semaphore,
   2306 			.semaphoreCount = 1,
   2307 			.pValues        = &value,
   2308 		};
   2309 		result = vkWaitSemaphores(vk->device, &semaphore_wait_info, timeout_ns) == VK_SUCCESS;
   2310 	}
   2311 	return result;
   2312 }
   2313 
   2314 DEBUG_IMPORT u64
   2315 vk_host_signal_timeline(VulkanTimeline timeline)
   2316 {
   2317 	u64 result = -1;
   2318 	if Between(timeline, 0, VulkanTimeline_Count - 1) {
   2319 		VulkanContext   *vk = vulkan_context;
   2320 		VulkanQueue     *vq = vk->queues[timeline];
   2321 		VulkanSemaphore *vs = &vq->timeline_semaphore;
   2322 		result = ++vs->value;
   2323 		VkSemaphoreSignalInfo ssi = {
   2324 			.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO,
   2325 			.semaphore = vs->semaphore,
   2326 			.value     = result,
   2327 		};
   2328 		vkSignalSemaphore(vk->device, &ssi);
   2329 	}
   2330 	return result;
   2331 }
   2332 
   2333 DEBUG_IMPORT VulkanHandle
   2334 vk_pipeline(VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size)
   2335 {
   2336 	assert(Between(count, 1, 2));
   2337 	assert(count == 2 || infos[0].kind == VulkanShaderKind_Compute);
   2338 
   2339 	VulkanHandle result = {0};
   2340 	DeferLoop(take_lock(&vulkan_context->arena_lock, -1), release_lock(&vulkan_context->arena_lock))
   2341 	{
   2342 		Arena arena = vulkan_context->arena;
   2343 
   2344 		VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Pipeline);
   2345 		result = (VulkanHandle){(u64)e};
   2346 
   2347 		if (count == 2) e->as.pipeline = vk_graphics_pipeline_from_infos(arena, infos, count, push_constants_size);
   2348 		else            e->as.pipeline = vk_compute_pipeline_from_shader_text(arena, infos[0].text, infos[0].name, push_constants_size);
   2349 	}
   2350 	return result;
   2351 }
   2352 
   2353 DEBUG_IMPORT b32
   2354 vk_pipeline_valid(VulkanHandle h)
   2355 {
   2356 	b32 result = 0;
   2357 	if ValidVulkanHandle(h) {
   2358 		VulkanPipeline *vp = vk_entity_data(h, VulkanEntityKind_Pipeline);
   2359 		if (vp->stage_flags == VK_SHADER_STAGE_COMPUTE_BIT)
   2360 			result = vp->pipeline != vulkan_context->default_compute_pipeline.pipeline;
   2361 		else
   2362 			result = vp->pipeline != vulkan_context->default_graphics_pipeline.pipeline;
   2363 	}
   2364 	return result;
   2365 }
   2366 
   2367 DEBUG_IMPORT void
   2368 vk_pipeline_release(VulkanHandle h)
   2369 {
   2370 	if (vk_pipeline_valid(h)) {
   2371 		VulkanEntity *e = (VulkanEntity *)h.value[0];
   2372 		VulkanTimeline timeline;
   2373 		if (e->as.pipeline.stage_flags == VK_SHADER_STAGE_COMPUTE_BIT) timeline = VulkanTimeline_Compute;
   2374 		else                                                           timeline = VulkanTimeline_Graphics;
   2375 
   2376 		// NOTE(rnp): block more command buffers from being recorded
   2377 		VulkanCommandPool *vcp = vulkan_context->command_pools[timeline];
   2378 		DeferLoop(take_lock(&vcp->lock, -1), release_lock(&vcp->lock)) {
   2379 			u32 index = (vcp->next_index - 1) % countof(vcp->buffers);
   2380 			vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL);
   2381 			vkDestroyPipeline(vulkan_context->device, e->as.pipeline.pipeline, 0);
   2382 			vkDestroyPipelineLayout(vulkan_context->device, e->as.pipeline.layout, 0);
   2383 
   2384 			if (&e->as.pipeline == vcp->bound_pipeline)
   2385 				vcp->bound_pipeline = 0;
   2386 		}
   2387 		vk_entity_release(e);
   2388 	}
   2389 }
   2390 
   2391 DEBUG_IMPORT void
   2392 vk_bind_shader_resources(BeamformerShaderResourceInfo *infos, u64 info_count)
   2393 {
   2394 	VulkanContext *vk = vulkan_context;
   2395 
   2396 	VkWriteDescriptorSet   write_sets[BeamformerShaderResourceKind_Count] = {0};
   2397 
   2398 	for EachIndex(info_count, it) {
   2399 		switch (infos[it].kind) {
   2400 		case BeamformerShaderResourceKind_Buffer:{
   2401 			VulkanBuffer *vb = vk_entity_data(infos[it].handle, VulkanEntityKind_Buffer);
   2402 			vk->descriptor_buffer_infos[infos[it].slot].buffer = vb->buffer;
   2403 			vk->descriptor_buffer_infos[infos[it].slot].offset = 0;
   2404 			vk->descriptor_buffer_infos[infos[it].slot].range  = vb->memory_size;
   2405 		}break;
   2406 
   2407 		InvalidDefaultCase;
   2408 		}
   2409 	}
   2410 
   2411 	write_sets[BeamformerShaderResourceKind_Buffer].sType            = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
   2412 	write_sets[BeamformerShaderResourceKind_Buffer].dstSet           = vk->descriptor_sets[BeamformerShaderResourceKind_Buffer];
   2413 	write_sets[BeamformerShaderResourceKind_Buffer].dstBinding       = 0;
   2414 	write_sets[BeamformerShaderResourceKind_Buffer].descriptorCount  = countof(vk->descriptor_buffer_infos);
   2415 	write_sets[BeamformerShaderResourceKind_Buffer].descriptorType   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
   2416 	write_sets[BeamformerShaderResourceKind_Buffer].pBufferInfo      = vk->descriptor_buffer_infos;
   2417 
   2418 	vkUpdateDescriptorSets(vk->device, countof(write_sets), write_sets, 0, 0);
   2419 }
   2420 
   2421 DEBUG_IMPORT VulkanHandle
   2422 vk_command_begin(VulkanTimeline timeline)
   2423 {
   2424 	VulkanHandle result = {0};
   2425 	if Between(timeline, 0, VulkanTimeline_Count - 1) {
   2426 		VulkanContext     *vk  = vulkan_context;
   2427 		VulkanCommandPool *vcp = vk->command_pools[timeline];
   2428 
   2429 		take_lock(&vcp->lock, -1);
   2430 
   2431 		VulkanEntity        *e   = vk_entity_allocate(VulkanEntityKind_CommandBuffer);
   2432 		VulkanCommandBuffer *vcb = &e->as.command_buffer;
   2433 		vcb->timeline     = timeline;
   2434 		vcb->buffer_index = vcp->next_index++ % countof(vcp->buffers);
   2435 
   2436 		u32 index = vcb->buffer_index;
   2437 		// TODO(rnp): probably not the best to have this here but it will likely not be hit
   2438 		b32 wait_result = vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL);
   2439 		assert(wait_result);
   2440 
   2441 		vcp->queries_occupied[index] = 0;
   2442 
   2443 		VkCommandBufferBeginInfo buffer_begin_info = {
   2444 			.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
   2445 			.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
   2446 		};
   2447 
   2448 		vkBeginCommandBuffer(vcp->buffers[index], &buffer_begin_info);
   2449 		vkCmdResetQueryPool(vcp->buffers[index], vcp->query_pool, index * MaxCommandBufferTimestamps,
   2450 		                    MaxCommandBufferTimestamps);
   2451 
   2452 		result = (VulkanHandle){(u64)e};
   2453 	}
   2454 	return result;
   2455 }
   2456 
   2457 DEBUG_IMPORT void
   2458 vk_command_bind_pipeline(VulkanHandle command, VulkanHandle pipeline)
   2459 {
   2460 	if ValidVulkanHandle(command) {
   2461 		VulkanContext       *vk  = vulkan_context;
   2462 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2463 		VulkanCommandPool   *vcp = vk->command_pools[vcb->timeline];
   2464 
   2465 		VulkanPipeline *vp = 0;
   2466 		if ValidVulkanHandle(pipeline) {
   2467 			vp = vk_entity_data(pipeline, VulkanEntityKind_Pipeline);
   2468 		} else if (vcb->timeline == VulkanTimeline_Compute) {
   2469 			vp = &vk->default_compute_pipeline;
   2470 		} else if (vcb->timeline == VulkanTimeline_Graphics) {
   2471 			vp = &vk->default_graphics_pipeline;
   2472 		} else {
   2473 			InvalidCodePath;
   2474 		}
   2475 
   2476 		read_only local_persist VkPipelineBindPoint bind_point_lut[VulkanTimeline_Count] = {
   2477 			[VulkanTimeline_Graphics] = VK_PIPELINE_BIND_POINT_GRAPHICS,
   2478 			[VulkanTimeline_Compute]  = VK_PIPELINE_BIND_POINT_COMPUTE,
   2479 			[VulkanTimeline_Transfer] = -1,
   2480 		};
   2481 
   2482 		VkPipelineBindPoint bind_point = bind_point_lut[vcb->timeline];
   2483 		assert(bind_point != (VkPipelineBindPoint)-1);
   2484 
   2485 		vkCmdBindPipeline(vcp->buffers[vcb->buffer_index], bind_point, vp->pipeline);
   2486 		vkCmdBindDescriptorSets(vcp->buffers[vcb->buffer_index], bind_point, vp->layout,
   2487 		                        0, countof(vk->descriptor_sets), vk->descriptor_sets, 0, 0);
   2488 		vcp->bound_pipeline = vp;
   2489 	}
   2490 }
   2491 
   2492 DEBUG_IMPORT void
   2493 vk_command_buffer_memory_barriers(VulkanHandle command, GPUMemoryBarrierInfo *barriers, u64 count)
   2494 {
   2495 	if ValidVulkanHandle(command) {
   2496 		VulkanContext       *vk  = vulkan_context;
   2497 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2498 		VulkanCommandPool   *vcp = vk->command_pools[vcb->timeline];
   2499 		VulkanQueue         *vq  = vk->queues[vcb->timeline];
   2500 
   2501 		DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock))
   2502 		{
   2503 			Arena arena = vk->arena;
   2504 			u32 valid_count = 0;
   2505 			VkBufferMemoryBarrier2 *memory_barriers = push_array(&arena, VkBufferMemoryBarrier2, count);
   2506 			for (u64 it = 0; it < count; it++) {
   2507 				if ValidVulkanHandle(barriers[it].gpu_buffer->handle) {
   2508 					u32           index = valid_count++;
   2509 					VulkanBuffer *vb    = vk_entity_data(barriers[it].gpu_buffer->handle, VulkanEntityKind_Buffer);
   2510 					memory_barriers[index].sType               = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2;
   2511 					memory_barriers[index].srcStageMask        = vq->pipeline_stage_flags;
   2512 					memory_barriers[index].srcAccessMask       = VK_ACCESS_2_MEMORY_WRITE_BIT;
   2513 					memory_barriers[index].dstStageMask        = vq->pipeline_stage_flags;
   2514 					memory_barriers[index].dstAccessMask       = VK_ACCESS_2_MEMORY_READ_BIT;
   2515 					memory_barriers[index].srcQueueFamilyIndex = vq->queue_family;
   2516 					memory_barriers[index].dstQueueFamilyIndex = vq->queue_family;
   2517 					memory_barriers[index].buffer              = vb->buffer;
   2518 					memory_barriers[index].offset              = barriers[it].offset;
   2519 					memory_barriers[index].size                = barriers[it].size;
   2520 				}
   2521 			}
   2522 
   2523 			VkDependencyInfo dependancy_info = {
   2524 				.sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
   2525 				.bufferMemoryBarrierCount = valid_count,
   2526 				.pBufferMemoryBarriers    = memory_barriers,
   2527 			};
   2528 
   2529 			vkCmdPipelineBarrier2(vcp->buffers[vcb->buffer_index], &dependancy_info);
   2530 		}
   2531 	}
   2532 }
   2533 
   2534 DEBUG_IMPORT void
   2535 vk_command_dispatch_compute(VulkanHandle command, uv3 dispatch)
   2536 {
   2537 	assert(dispatch.x <= U16_MAX);
   2538 	assert(dispatch.y <= U16_MAX);
   2539 	assert(dispatch.z <= U16_MAX);
   2540 	if ValidVulkanHandle(command) {
   2541 		VkCommandBuffer cmd = vk_command_buffer(command);
   2542 		vkCmdDispatch(cmd, dispatch.x, dispatch.y, dispatch.z);
   2543 	}
   2544 }
   2545 
   2546 DEBUG_IMPORT void
   2547 vk_command_push_constants(VulkanHandle command, u32 offset, u32 size, void *values)
   2548 {
   2549 	if ValidVulkanHandle(command) {
   2550 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2551 		VulkanCommandPool   *vcp = vulkan_context->command_pools[vcb->timeline];
   2552 		VulkanPipeline      *vp  = vcp->bound_pipeline;
   2553 
   2554 		assert(vp);
   2555 
   2556 		vkCmdPushConstants(vcp->buffers[vcb->buffer_index], vp->layout, vp->stage_flags, offset, size, values);
   2557 	}
   2558 }
   2559 
   2560 DEBUG_IMPORT void
   2561 vk_command_timestamp(VulkanHandle command)
   2562 {
   2563 	if ValidVulkanHandle(command) {
   2564 		VulkanContext       *vk  = vulkan_context;
   2565 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2566 		VulkanCommandPool   *vcp = vk->command_pools[vcb->timeline];
   2567 
   2568 		read_only local_persist VkPipelineStageFlags2 stage_lut[VulkanTimeline_Count] = {
   2569 			[VulkanTimeline_Graphics] = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT,
   2570 			[VulkanTimeline_Compute]  = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
   2571 			[VulkanTimeline_Transfer] = -1,
   2572 		};
   2573 
   2574 		VkPipelineStageFlags2 stage = stage_lut[vcb->timeline];
   2575 		assert(stage != (VkPipelineStageFlags2)-1);
   2576 
   2577 		if (vcp->queries_occupied[vcb->buffer_index] < MaxCommandBufferTimestamps) {
   2578 			u32 query_index = vcp->queries_occupied[vcb->buffer_index]++;
   2579 			vkCmdWriteTimestamp2(vcp->buffers[vcb->buffer_index], stage, vcp->query_pool,
   2580 			                     vcb->buffer_index * MaxCommandBufferTimestamps + query_index);
   2581 		}
   2582 	}
   2583 }
   2584 
   2585 DEBUG_IMPORT void
   2586 vk_command_wait_timeline(VulkanHandle command, VulkanTimeline timeline, u64 value)
   2587 {
   2588 	if (ValidVulkanHandle(command) && Between(timeline, 0, VulkanTimeline_Count - 1)) {
   2589 		VulkanContext       *vk  = vulkan_context;
   2590 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2591 
   2592 		u32 wait_index = vk->queue_indices[timeline];
   2593 		vcb->in_flight_wait_values[wait_index] = Max(value, vcb->in_flight_wait_values[wait_index]);
   2594 	}
   2595 }
   2596 
   2597 DEBUG_IMPORT u64
   2598 vk_command_end(VulkanHandle command, VulkanHandle wait_semaphore, VulkanHandle finished_semaphore)
   2599 {
   2600 	u64 result = -1;
   2601 	if ValidVulkanHandle(command) {
   2602 		VulkanContext       *vk  = vulkan_context;
   2603 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2604 		VulkanCommandPool   *vcp = vk->command_pools[vcb->timeline];
   2605 		VulkanQueue         *vq  = vk->queues[vcb->timeline];
   2606 		VulkanSemaphore     *vs  = &vq->timeline_semaphore;
   2607 
   2608 		vkEndCommandBuffer(vcp->buffers[vcb->buffer_index]);
   2609 
   2610 		DeferLoop(take_lock(&vq->lock, -1), release_lock(&vq->lock)) {
   2611 			VkCommandBufferSubmitInfo command_buffer_submit_info = {
   2612 				.sType         = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
   2613 				.commandBuffer = vcp->buffers[vcb->buffer_index],
   2614 			};
   2615 
   2616 			result = ++vs->value;
   2617 
   2618 			u32 signal_submit_info_count = 1;
   2619 			VkSemaphoreSubmitInfo signal_submit_infos[2] = {{
   2620 				.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
   2621 				.semaphore = vs->semaphore,
   2622 				.value     = result,
   2623 				.stageMask = vq->pipeline_stage_flags,
   2624 			}};
   2625 
   2626 			if ValidVulkanHandle(finished_semaphore) {
   2627 				VulkanSemaphore *fs = vk_entity_data(finished_semaphore, VulkanEntityKind_Semaphore);
   2628 				signal_submit_infos[signal_submit_info_count++] = (VkSemaphoreSubmitInfo){
   2629 					.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
   2630 					.semaphore = fs->semaphore,
   2631 					.stageMask = vq->pipeline_stage_flags,
   2632 				};
   2633 			}
   2634 
   2635 			u32 wait_submit_info_count = 0;
   2636 			VkSemaphoreSubmitInfo wait_submit_infos[VulkanQueueKind_Count + 1];
   2637 			for (u32 i = 0; i < vk->unique_queues; i++) {
   2638 				u32 queue_index = vk->queue_indices[i];
   2639 				if (vcb->in_flight_wait_values[queue_index] > 0) {
   2640 					VulkanQueue *q = vk->queues[queue_index];
   2641 					VkSemaphoreSubmitInfo wait_ssi = {
   2642 						.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
   2643 						.semaphore = q->timeline_semaphore.semaphore,
   2644 						.value     = vcb->in_flight_wait_values[queue_index],
   2645 						.stageMask = q->pipeline_stage_flags,
   2646 					};
   2647 					wait_submit_infos[wait_submit_info_count++] = wait_ssi;
   2648 				}
   2649 			}
   2650 
   2651 			if ValidVulkanHandle(wait_semaphore) {
   2652 				VulkanSemaphore *ws = vk_entity_data(wait_semaphore, VulkanEntityKind_Semaphore);
   2653 				wait_submit_infos[wait_submit_info_count++] = (VkSemaphoreSubmitInfo){
   2654 					.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
   2655 					.semaphore = ws->semaphore,
   2656 					.stageMask = vq->pipeline_stage_flags,
   2657 				};
   2658 			}
   2659 
   2660 			VkSubmitInfo2 submit_info = {
   2661 				.sType                    = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
   2662 				.commandBufferInfoCount   = 1,
   2663 				.pCommandBufferInfos      = &command_buffer_submit_info,
   2664 				.waitSemaphoreInfoCount   = wait_submit_info_count,
   2665 				.pWaitSemaphoreInfos      = wait_submit_infos,
   2666 				.signalSemaphoreInfoCount = signal_submit_info_count,
   2667 				.pSignalSemaphoreInfos    = signal_submit_infos,
   2668 			};
   2669 
   2670 			vkQueueSubmit2(vq->queue, 1, &submit_info, 0);
   2671 
   2672 			vcp->bound_pipeline = 0;
   2673 
   2674 			atomic_store_u64(vcp->submission_values + vcb->buffer_index, result);
   2675 		}
   2676 
   2677 		release_lock(&vcp->lock);
   2678 
   2679 		vk_entity_release((VulkanEntity *)command.value[0]);
   2680 	}
   2681 	return result;
   2682 }
   2683 
   2684 DEBUG_IMPORT void
   2685 vk_command_begin_rendering(VulkanHandle command, GPUImage *colour, GPUImage *depth, GPUImage *resolve)
   2686 {
   2687 	if ValidVulkanHandle(command) {
   2688 		VkCommandBuffer cmd = vk_command_buffer(command);
   2689 
   2690 		assert((colour->width == depth->width) && (colour->height == depth->height));
   2691 
   2692 		VulkanImage *ci = vk_entity_data(colour->image, VulkanEntityKind_Image);
   2693 		VulkanImage *di = vk_entity_data(depth->image,  VulkanEntityKind_Image);
   2694 		VulkanImage *ri = 0;
   2695 		if (resolve) ri = vk_entity_data(resolve->image, VulkanEntityKind_Image);
   2696 
   2697 		// NOTE: Layout Transitions
   2698 		{
   2699 			u32 image_memory_barrier_count = 2;
   2700 			VkImageMemoryBarrier2 image_memory_barriers[3] = {
   2701 				{
   2702 					.sType            = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
   2703 					.srcStageMask     = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
   2704 					.srcAccessMask    = 0,
   2705 					.dstStageMask     = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
   2706 					.dstAccessMask    = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT,
   2707 					.oldLayout        = VK_IMAGE_LAYOUT_UNDEFINED,
   2708 					.newLayout        = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
   2709 					.image            = ci->image,
   2710 					.subresourceRange = {
   2711 						.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
   2712 						.baseMipLevel   = 0,
   2713 						.levelCount     = 1,
   2714 						.baseArrayLayer = 0,
   2715 						.layerCount     = 1,
   2716 					},
   2717 				},
   2718 				{
   2719 					.sType            = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
   2720 					.srcStageMask     = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT,
   2721 					.srcAccessMask    = 0,
   2722 					.dstStageMask     = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT,
   2723 					.dstAccessMask    = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
   2724 					.oldLayout        = VK_IMAGE_LAYOUT_UNDEFINED,
   2725 					.newLayout        = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
   2726 					.image            = di->image,
   2727 					.subresourceRange = {
   2728 						.aspectMask     = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT,
   2729 						.baseMipLevel   = 0,
   2730 						.levelCount     = 1,
   2731 						.baseArrayLayer = 0,
   2732 						.layerCount     = 1,
   2733 					},
   2734 				},
   2735 			};
   2736 
   2737 			if (resolve) image_memory_barriers[image_memory_barrier_count++] = (VkImageMemoryBarrier2){
   2738 				.sType            = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
   2739 				.srcStageMask     = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
   2740 				.srcAccessMask    = 0,
   2741 				.dstStageMask     = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT|VK_PIPELINE_STAGE_2_RESOLVE_BIT,
   2742 				.dstAccessMask    = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT,
   2743 				.oldLayout        = VK_IMAGE_LAYOUT_UNDEFINED,
   2744 				.newLayout        = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
   2745 				.image            = ri->image,
   2746 				.subresourceRange = {
   2747 					.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
   2748 					.baseMipLevel   = 0,
   2749 					.levelCount     = 1,
   2750 					.baseArrayLayer = 0,
   2751 					.layerCount     = 1,
   2752 				},
   2753 			};
   2754 
   2755 			VkDependencyInfo dependency_info = {
   2756 				.sType                   = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
   2757 				.imageMemoryBarrierCount = image_memory_barrier_count,
   2758 				.pImageMemoryBarriers    = image_memory_barriers,
   2759 			};
   2760 
   2761 			vkCmdPipelineBarrier2(cmd, &dependency_info);
   2762 		}
   2763 
   2764 		VkRenderingAttachmentInfo colour_attachment = {
   2765 			.sType              = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
   2766 			.imageView          = ci->view,
   2767 			.imageLayout        = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
   2768 			.resolveMode        = ri ? VK_RESOLVE_MODE_AVERAGE_BIT : 0,
   2769 			.resolveImageView   = ri ? ri->view : 0,
   2770 			.resolveImageLayout = ri ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : 0,
   2771 			.loadOp             = VK_ATTACHMENT_LOAD_OP_CLEAR,
   2772 			.storeOp            = VK_ATTACHMENT_STORE_OP_STORE,
   2773 			.clearValue         = {.color = {{0.0f, 0.0f, 0.0f, 0.0f}}},
   2774 		};
   2775 
   2776 		VkRenderingAttachmentInfo depth_stencil_attachment = {
   2777 			.sType       = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
   2778 			.imageView   = di->view,
   2779 			.imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
   2780 			.loadOp      = VK_ATTACHMENT_LOAD_OP_CLEAR,
   2781 			.storeOp     = VK_ATTACHMENT_STORE_OP_STORE,
   2782 			.clearValue  = {.depthStencil = {1.0f, 0}},
   2783 		};
   2784 
   2785 		VkRenderingInfo rendering_info = {
   2786 			.sType                = VK_STRUCTURE_TYPE_RENDERING_INFO,
   2787 			.renderArea           = {.offset = {0}, .extent = {colour->width, colour->height}},
   2788 			.layerCount           = 1,
   2789 			.colorAttachmentCount = 1,
   2790 			.pColorAttachments    = &colour_attachment,
   2791 			.pDepthAttachment     = &depth_stencil_attachment,
   2792 			.pStencilAttachment   = &depth_stencil_attachment,
   2793 		};
   2794 
   2795 		vkCmdBeginRendering(cmd, &rendering_info);
   2796 	}
   2797 }
   2798 
   2799 DEBUG_IMPORT void
   2800 vk_command_draw(VulkanHandle command, GPUBuffer *model)
   2801 {
   2802 	if (ValidVulkanHandle(command) && ValidVulkanHandle(model->handle)) {
   2803 		VkCommandBuffer cmd = vk_command_buffer(command);
   2804 		VulkanBuffer   *vb  = vk_entity_data(model->handle, VulkanEntityKind_RenderModel);
   2805 		vkCmdBindIndexBuffer2(cmd, vb->buffer, 0, vk_index_size(vb->index_type) * model->index_count, vb->index_type);
   2806 		vkCmdDrawIndexed(cmd, model->index_count, 1, 0, 0, 0);
   2807 	}
   2808 }
   2809 
   2810 DEBUG_IMPORT void
   2811 vk_command_scissor(VulkanHandle command, u32 width, u32 height, u32 x_offset, u32 y_offset)
   2812 {
   2813 	if ValidVulkanHandle(command) {
   2814 		VkCommandBuffer cmd = vk_command_buffer(command);
   2815 		VkRect2D scissor = {.offset = {x_offset, y_offset}, .extent = {width, height}};
   2816 		vkCmdSetScissor(cmd, 0, 1, &scissor);
   2817 	}
   2818 }
   2819 
   2820 DEBUG_IMPORT void
   2821 vk_command_viewport(VulkanHandle command, f32 width, f32 height, f32 x_offset, f32 y_offset, f32 min_depth, f32 max_depth)
   2822 {
   2823 	if ValidVulkanHandle(command) {
   2824 		VkCommandBuffer cmd = vk_command_buffer(command);
   2825 		VkViewport viewport = {x_offset, y_offset, width, height, min_depth, max_depth};
   2826 		vkCmdSetViewport(cmd, 0, 1, &viewport);
   2827 	}
   2828 }
   2829 
   2830 DEBUG_IMPORT void
   2831 vk_command_end_rendering(VulkanHandle command)
   2832 {
   2833 	if ValidVulkanHandle(command) vkCmdEndRendering(vk_command_buffer(command));
   2834 }
   2835 
   2836 DEBUG_IMPORT void
   2837 vk_command_copy_buffer(VulkanHandle command, GPUBuffer *restrict destination,
   2838                        GPUBuffer *restrict source, u64 source_offset, i64 size)
   2839 {
   2840 	if (ValidVulkanHandle(command) && ValidVulkanHandle(destination->handle) && ValidVulkanHandle(source->handle)) {
   2841 		VkCommandBuffer cmd = vk_command_buffer(command);
   2842 		VulkanBuffer *db = vk_entity_data(destination->handle, VulkanEntityKind_Buffer);
   2843 		VulkanBuffer *sb = vk_entity_data(source->handle,      VulkanEntityKind_Buffer);
   2844 
   2845 		VkBufferCopy2 buffer_copy = {
   2846 			.sType     = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
   2847 			.srcOffset = source_offset,
   2848 			.dstOffset = 0,
   2849 			.size      = size,
   2850 		};
   2851 
   2852 		VkCopyBufferInfo2 copy_buffer_info = {
   2853 			.sType       = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
   2854 			.srcBuffer   = sb->buffer,
   2855 			.dstBuffer   = db->buffer,
   2856 			.regionCount = 1,
   2857 			.pRegions    = &buffer_copy,
   2858 		};
   2859 
   2860 		vkCmdCopyBuffer2(cmd, &copy_buffer_info);
   2861 	}
   2862 }
   2863 
   2864 DEBUG_IMPORT u64 *
   2865 vk_command_read_timestamps(VulkanTimeline timeline, Arena *arena)
   2866 {
   2867 	u64 *result = 0;
   2868 	if Between(timeline, 0, VulkanTimeline_Count - 1) {
   2869 		VulkanContext     *vk  = vulkan_context;
   2870 		VulkanCommandPool *vcp = vk->command_pools[timeline];
   2871 		DeferLoop(take_lock(&vcp->lock, -1), release_lock(&vcp->lock)) {
   2872 			u32 index = (vcp->next_index - 1) % countof(vcp->buffers);
   2873 			u32 count = vcp->queries_occupied[index];
   2874 			if (count > 0) {
   2875 				result = push_array(arena, u64, count + 1);
   2876 				result[0] = count;
   2877 
   2878 				vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL);
   2879 
   2880 				vkGetQueryPoolResults(vk->device, vcp->query_pool, index * MaxCommandBufferTimestamps, count,
   2881 				                      count * sizeof(u64), result + 1, 8, VK_QUERY_RESULT_WAIT_BIT);
   2882 			}
   2883 		}
   2884 	} else {
   2885 		result = push_array(arena, u64, 1);
   2886 	}
   2887 	return result;
   2888 }