Source code for simvx.graphics.renderer.ssao_pass

"""Screen-Space Ambient Occlusion (SSAO) pass — compute-based AO from depth buffer."""

import logging
from typing import Any

import numpy as np
import vulkan as vk

from ..gpu.descriptors import (
    DescriptorWriteBatch,
    allocate_descriptor_set,
    create_descriptor_set_layout,
    create_pool_for_types,
)
from ..gpu.memory import create_buffer, create_image, upload_image_data, upload_numpy
from ..gpu.pipeline_compute import create_compute_pipeline

__all__ = ["SSAOPass"]

log = logging.getLogger(__name__)

KERNEL_SIZE = 32
# Push constant: mat4 proj(64) + vec4 params(16) + vec4 resolution(16) = 96 bytes
_PC_SIZE = 96

[docs] class SSAOPass: """Compute-based SSAO: generates ambient occlusion from depth buffer. Pipeline: depth -> SSAO generation (compute) -> box blur (compute) -> R8 AO texture. Operates at half resolution for performance. The blurred AO texture can be sampled in the tonemap/post-process pass to darken ambient lighting in crevices. Kernel samples are stored in a UBO (binding 3) to stay within push constant limits. """ def __init__(self, engine: Any): self._engine = engine self._ready = False # Compute pipelines self._ssao_pipeline: Any = None self._ssao_layout: Any = None self._blur_pipeline: Any = None self._blur_layout: Any = None self._ssao_module: Any = None self._blur_module: Any = None # Images self._ao_image: Any = None self._ao_memory: Any = None self._ao_view: Any = None self._blur_image: Any = None self._blur_memory: Any = None self._blur_view: Any = None self._noise_image: Any = None self._noise_memory: Any = None self._noise_view: Any = None # Kernel UBO self._kernel_buf: Any = None self._kernel_mem: Any = None # Descriptors self._ssao_desc_pool: Any = None self._ssao_desc_layout: Any = None self._ssao_desc_set: Any = None self._blur_desc_pool: Any = None self._blur_desc_layout: Any = None self._blur_desc_set: Any = None self._depth_sampler: Any = None self._noise_sampler: Any = None # Kernel samples (pre-computed hemisphere) self._kernel: np.ndarray = np.zeros((KERNEL_SIZE, 4), dtype=np.float32) # Depth image reference (for layout transitions) self._depth_image: Any = None # Dimensions self._width: int = 0 self._height: int = 0 # Public settings self.enabled: bool = True self.radius: float = 0.5 self.bias: float = 0.025 self.intensity: float = 1.0
[docs] @property def ao_view(self) -> Any: """Blurred AO image view for sampling in post-process.""" return self._blur_view
[docs] def setup(self, width: int, height: int, depth_view: Any, depth_image: Any = None) -> None: """Initialize SSAO resources: noise texture, AO images, kernel, compute pipelines.""" self._width = width self._height = height self._depth_image = depth_image self._generate_kernel() self._create_kernel_ubo() self._create_noise_texture() self._create_ao_images(width, height) self._create_samplers() self._create_ssao_descriptors(depth_view) self._create_blur_descriptors() self._create_pipelines() self._ready = True log.debug("SSAO pass initialized (%dx%d half-res, %d kernel samples)", width, height, KERNEL_SIZE)
def _generate_kernel(self) -> None: """Generate hemisphere kernel samples with accelerating distribution.""" rng = np.random.default_rng(42) for i in range(KERNEL_SIZE): sample = np.array( [ rng.uniform(-1.0, 1.0), rng.uniform(-1.0, 1.0), rng.uniform(0.0, 1.0), ], dtype=np.float32, ) sample /= np.linalg.norm(sample) sample *= rng.uniform(0.0, 1.0) # Accelerate distribution: more samples close to origin scale = i / KERNEL_SIZE scale = 0.1 + scale * scale * 0.9 sample *= scale self._kernel[i, :3] = sample def _create_kernel_ubo(self) -> None: """Create UBO for hemisphere kernel samples (32 * vec4 = 512 bytes).""" e = self._engine ubo_size = KERNEL_SIZE * 16 # 32 * vec4(16 bytes) self._kernel_buf, self._kernel_mem = create_buffer( e.ctx.device, e.ctx.physical_device, ubo_size, vk.VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, ) upload_numpy(e.ctx.device, self._kernel_mem, self._kernel) def _create_noise_texture(self) -> None: """Create 4x4 noise texture with random tangent-space rotations.""" e = self._engine rng = np.random.default_rng(7) noise_data = np.zeros((4, 4, 4), dtype=np.uint8) for y in range(4): for x in range(4): rx = rng.uniform(-1.0, 1.0) ry = rng.uniform(-1.0, 1.0) length = max(np.sqrt(rx * rx + ry * ry), 1e-6) rx /= length ry /= length noise_data[y, x, 0] = int((rx * 0.5 + 0.5) * 255) noise_data[y, x, 1] = int((ry * 0.5 + 0.5) * 255) noise_data[y, x, 2] = 0 noise_data[y, x, 3] = 255 self._noise_image, self._noise_memory = upload_image_data( e.ctx.device, e.ctx.physical_device, e.ctx.graphics_queue, e.ctx.command_pool, np.ascontiguousarray(noise_data), 4, 4, vk.VK_FORMAT_R8G8B8A8_UNORM, ) self._noise_view = vk.vkCreateImageView( e.ctx.device, vk.VkImageViewCreateInfo( image=self._noise_image, viewType=vk.VK_IMAGE_VIEW_TYPE_2D, format=vk.VK_FORMAT_R8G8B8A8_UNORM, subresourceRange=vk.VkImageSubresourceRange( aspectMask=vk.VK_IMAGE_ASPECT_COLOR_BIT, baseMipLevel=0, levelCount=1, baseArrayLayer=0, layerCount=1, ), ), None, ) def _create_ao_images(self, width: int, height: int) -> None: """Create half-res R8 images for raw AO and blurred AO.""" e = self._engine hw, hh = max(1, width // 2), max(1, height // 2) for attr in ("_ao", "_blur"): image, memory = create_image( e.ctx.device, e.ctx.physical_device, hw, hh, vk.VK_FORMAT_R8_UNORM, vk.VK_IMAGE_USAGE_STORAGE_BIT | vk.VK_IMAGE_USAGE_SAMPLED_BIT, ) view = vk.vkCreateImageView( e.ctx.device, vk.VkImageViewCreateInfo( image=image, viewType=vk.VK_IMAGE_VIEW_TYPE_2D, format=vk.VK_FORMAT_R8_UNORM, subresourceRange=vk.VkImageSubresourceRange( aspectMask=vk.VK_IMAGE_ASPECT_COLOR_BIT, baseMipLevel=0, levelCount=1, baseArrayLayer=0, layerCount=1, ), ), None, ) setattr(self, f"{attr}_image", image) setattr(self, f"{attr}_memory", memory) setattr(self, f"{attr}_view", view) # Transition both to GENERAL for compute storage from ..gpu.memory import transition_image_layout for img in (self._ao_image, self._blur_image): transition_image_layout( e.ctx.device, e.ctx.graphics_queue, e.ctx.command_pool, img, vk.VK_IMAGE_LAYOUT_UNDEFINED, vk.VK_IMAGE_LAYOUT_GENERAL, ) def _create_samplers(self) -> None: """Create samplers for depth and noise textures.""" device = self._engine.ctx.device self._depth_sampler = vk.vkCreateSampler( device, vk.VkSamplerCreateInfo( magFilter=vk.VK_FILTER_NEAREST, minFilter=vk.VK_FILTER_NEAREST, addressModeU=vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, addressModeV=vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, addressModeW=vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, anisotropyEnable=vk.VK_FALSE, unnormalizedCoordinates=vk.VK_FALSE, mipmapMode=vk.VK_SAMPLER_MIPMAP_MODE_NEAREST, ), None, ) self._noise_sampler = vk.vkCreateSampler( device, vk.VkSamplerCreateInfo( magFilter=vk.VK_FILTER_NEAREST, minFilter=vk.VK_FILTER_NEAREST, addressModeU=vk.VK_SAMPLER_ADDRESS_MODE_REPEAT, addressModeV=vk.VK_SAMPLER_ADDRESS_MODE_REPEAT, addressModeW=vk.VK_SAMPLER_ADDRESS_MODE_REPEAT, anisotropyEnable=vk.VK_FALSE, unnormalizedCoordinates=vk.VK_FALSE, mipmapMode=vk.VK_SAMPLER_MIPMAP_MODE_NEAREST, ), None, ) def _create_ssao_descriptors(self, depth_view: Any) -> None: """Create descriptor set: depth + noise (samplers), AO output (storage), kernel (UBO).""" device = self._engine.ctx.device cs = vk.VK_SHADER_STAGE_COMPUTE_BIT self._ssao_desc_layout = create_descriptor_set_layout(device, [ (0, vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, cs, 1), (1, vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, cs, 1), (2, vk.VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, cs, 1), (3, vk.VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, cs, 1), ]) self._ssao_desc_pool = create_pool_for_types(device, { vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 2, vk.VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 1, vk.VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 1, }) self._ssao_desc_set = allocate_descriptor_set(device, self._ssao_desc_pool, self._ssao_desc_layout) self._write_ssao_descriptors(depth_view) def _write_ssao_descriptors(self, depth_view: Any) -> None: """Write depth, noise, AO output, and kernel UBO to SSAO descriptor set.""" with DescriptorWriteBatch(self._engine.ctx.device) as batch: batch.image( self._ssao_desc_set, 0, depth_view, self._depth_sampler, image_layout=vk.VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, ) batch.image(self._ssao_desc_set, 1, self._noise_view, self._noise_sampler) batch.storage_image(self._ssao_desc_set, 2, self._ao_view) batch.uniform_buffer(self._ssao_desc_set, 3, self._kernel_buf, KERNEL_SIZE * 16) def _create_blur_descriptors(self) -> None: """Create descriptor set for blur compute: AO input (storage) + blur output (storage).""" device = self._engine.ctx.device cs = vk.VK_SHADER_STAGE_COMPUTE_BIT self._blur_desc_layout = create_descriptor_set_layout(device, [ (0, vk.VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, cs, 1), (1, vk.VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, cs, 1), ]) self._blur_desc_pool = create_pool_for_types( device, {vk.VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 2}, ) self._blur_desc_set = allocate_descriptor_set(device, self._blur_desc_pool, self._blur_desc_layout) self._write_blur_descriptors() def _write_blur_descriptors(self) -> None: """Write AO input and blur output to blur descriptor set.""" with DescriptorWriteBatch(self._engine.ctx.device) as batch: batch.storage_image(self._blur_desc_set, 0, self._ao_view) batch.storage_image(self._blur_desc_set, 1, self._blur_view) def _create_pipelines(self) -> None: """Create SSAO and blur compute pipelines.""" e = self._engine device = e.ctx.device shader_dir = e.shader_dir self._ssao_pipeline, self._ssao_layout, self._ssao_module = create_compute_pipeline( device, shader_dir / "ssao.comp", [self._ssao_desc_layout], _PC_SIZE, ) self._blur_pipeline, self._blur_layout, self._blur_module = create_compute_pipeline( device, shader_dir / "ssao_blur.comp", [self._blur_desc_layout], 0, )
[docs] def render(self, cmd: Any, proj_matrix: np.ndarray) -> None: """Dispatch SSAO compute + blur. Call between HDR pass end and tonemap. Args: cmd: Active command buffer (outside any render pass). proj_matrix: Camera projection matrix (row-major numpy, transposed for GPU). """ if not self._ready or not self.enabled: return ffi = vk.ffi hw = max(1, self._width // 2) hh = max(1, self._height // 2) groups_x = (hw + 7) // 8 groups_y = (hh + 7) // 8 # Ensure depth writes are visible before compute reads. # The HDR render pass already transitions depth to READ_ONLY_OPTIMAL # (samplable_depth=True), so oldLayout matches the current layout. if self._depth_image: depth_barrier = vk.VkImageMemoryBarrier( srcAccessMask=vk.VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, dstAccessMask=vk.VK_ACCESS_SHADER_READ_BIT, oldLayout=vk.VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, newLayout=vk.VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, srcQueueFamilyIndex=vk.VK_QUEUE_FAMILY_IGNORED, dstQueueFamilyIndex=vk.VK_QUEUE_FAMILY_IGNORED, image=self._depth_image, subresourceRange=vk.VkImageSubresourceRange( aspectMask=vk.VK_IMAGE_ASPECT_DEPTH_BIT, baseMipLevel=0, levelCount=1, baseArrayLayer=0, layerCount=1, ), ) vk.vkCmdPipelineBarrier( cmd, vk.VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, vk.VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, None, 0, None, 1, [depth_barrier], ) # --- SSAO generation --- vk.vkCmdBindPipeline(cmd, vk.VK_PIPELINE_BIND_POINT_COMPUTE, self._ssao_pipeline) vk.vkCmdBindDescriptorSets( cmd, vk.VK_PIPELINE_BIND_POINT_COMPUTE, self._ssao_layout, 0, 1, [self._ssao_desc_set], 0, None, ) # Push constants: mat4 proj(64) + vec4 params(16) + vec4 resolution(16) = 96 bytes proj_t = np.ascontiguousarray(proj_matrix.T, dtype=np.float32) params = np.array([self.radius, self.bias, self.intensity, 0.0], dtype=np.float32) resolution = np.array([float(hw), float(hh), 1.0 / hw, 1.0 / hh], dtype=np.float32) pc_data = proj_t.tobytes() + params.tobytes() + resolution.tobytes() cbuf = ffi.new("char[]", pc_data) vk._vulkan.lib.vkCmdPushConstants( cmd, self._ssao_layout, vk.VK_SHADER_STAGE_COMPUTE_BIT, 0, _PC_SIZE, cbuf, ) vk.vkCmdDispatch(cmd, groups_x, groups_y, 1) # Barrier: SSAO write -> blur read barrier = vk.VkImageMemoryBarrier( srcAccessMask=vk.VK_ACCESS_SHADER_WRITE_BIT, dstAccessMask=vk.VK_ACCESS_SHADER_READ_BIT, oldLayout=vk.VK_IMAGE_LAYOUT_GENERAL, newLayout=vk.VK_IMAGE_LAYOUT_GENERAL, srcQueueFamilyIndex=vk.VK_QUEUE_FAMILY_IGNORED, dstQueueFamilyIndex=vk.VK_QUEUE_FAMILY_IGNORED, image=self._ao_image, subresourceRange=vk.VkImageSubresourceRange( aspectMask=vk.VK_IMAGE_ASPECT_COLOR_BIT, baseMipLevel=0, levelCount=1, baseArrayLayer=0, layerCount=1, ), ) vk.vkCmdPipelineBarrier( cmd, vk.VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk.VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, None, 0, None, 1, [barrier], ) # --- Blur --- vk.vkCmdBindPipeline(cmd, vk.VK_PIPELINE_BIND_POINT_COMPUTE, self._blur_pipeline) vk.vkCmdBindDescriptorSets( cmd, vk.VK_PIPELINE_BIND_POINT_COMPUTE, self._blur_layout, 0, 1, [self._blur_desc_set], 0, None, ) vk.vkCmdDispatch(cmd, groups_x, groups_y, 1) # Barrier: blur write -> fragment shader read (tonemap sampling) blur_barrier = vk.VkImageMemoryBarrier( srcAccessMask=vk.VK_ACCESS_SHADER_WRITE_BIT, dstAccessMask=vk.VK_ACCESS_SHADER_READ_BIT, oldLayout=vk.VK_IMAGE_LAYOUT_GENERAL, newLayout=vk.VK_IMAGE_LAYOUT_GENERAL, srcQueueFamilyIndex=vk.VK_QUEUE_FAMILY_IGNORED, dstQueueFamilyIndex=vk.VK_QUEUE_FAMILY_IGNORED, image=self._blur_image, subresourceRange=vk.VkImageSubresourceRange( aspectMask=vk.VK_IMAGE_ASPECT_COLOR_BIT, baseMipLevel=0, levelCount=1, baseArrayLayer=0, layerCount=1, ), ) vk.vkCmdPipelineBarrier( cmd, vk.VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk.VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, None, 0, None, 1, [blur_barrier], )
[docs] def resize(self, width: int, height: int, depth_view: Any, depth_image: Any = None) -> None: """Recreate AO images for new dimensions.""" if not self._ready: return self._width = width self._height = height if depth_image is not None: self._depth_image = depth_image self._destroy_ao_images() self._create_ao_images(width, height) self._write_ssao_descriptors(depth_view) self._write_blur_descriptors()
def _destroy_ao_images(self) -> None: """Destroy AO and blur images/views/memory.""" device = self._engine.ctx.device for attr in ("_ao", "_blur"): for suffix in ("_view", "_image", "_memory"): obj = getattr(self, f"{attr}{suffix}", None) if obj: if suffix == "_view": vk.vkDestroyImageView(device, obj, None) elif suffix == "_image": vk.vkDestroyImage(device, obj, None) elif suffix == "_memory": vk.vkFreeMemory(device, obj, None) setattr(self, f"{attr}{suffix}", None)
[docs] def cleanup(self) -> None: """Release all GPU resources.""" if not self._ready: return device = self._engine.ctx.device for pipeline, layout in [ (self._ssao_pipeline, self._ssao_layout), (self._blur_pipeline, self._blur_layout), ]: if pipeline: vk.vkDestroyPipeline(device, pipeline, None) if layout: vk.vkDestroyPipelineLayout(device, layout, None) if self._ssao_module: vk.vkDestroyShaderModule(device, self._ssao_module, None) if self._blur_module: vk.vkDestroyShaderModule(device, self._blur_module, None) if self._ssao_desc_pool: vk.vkDestroyDescriptorPool(device, self._ssao_desc_pool, None) if self._blur_desc_pool: vk.vkDestroyDescriptorPool(device, self._blur_desc_pool, None) if self._ssao_desc_layout: vk.vkDestroyDescriptorSetLayout(device, self._ssao_desc_layout, None) if self._blur_desc_layout: vk.vkDestroyDescriptorSetLayout(device, self._blur_desc_layout, None) if self._depth_sampler: vk.vkDestroySampler(device, self._depth_sampler, None) if self._noise_sampler: vk.vkDestroySampler(device, self._noise_sampler, None) self._destroy_ao_images() if self._noise_view: vk.vkDestroyImageView(device, self._noise_view, None) if self._noise_image: vk.vkDestroyImage(device, self._noise_image, None) if self._noise_memory: vk.vkFreeMemory(device, self._noise_memory, None) if self._kernel_buf: vk.vkDestroyBuffer(device, self._kernel_buf, None) if self._kernel_mem: vk.vkFreeMemory(device, self._kernel_mem, None) self._ready = False