"""Screen-Space Ambient Occlusion (SSAO) pass — compute-based AO from depth buffer."""
import logging
from typing import Any
import numpy as np
import vulkan as vk
from ..gpu.descriptors import (
DescriptorWriteBatch,
allocate_descriptor_set,
create_descriptor_set_layout,
create_pool_for_types,
)
from ..gpu.memory import create_buffer, create_image, upload_image_data, upload_numpy
from ..gpu.pipeline_compute import create_compute_pipeline
__all__ = ["SSAOPass"]
log = logging.getLogger(__name__)
KERNEL_SIZE = 32
# Push constant: mat4 proj(64) + vec4 params(16) + vec4 resolution(16) = 96 bytes
_PC_SIZE = 96
[docs]
class SSAOPass:
"""Compute-based SSAO: generates ambient occlusion from depth buffer.
Pipeline: depth -> SSAO generation (compute) -> box blur (compute) -> R8 AO texture.
Operates at half resolution for performance. The blurred AO texture can be sampled
in the tonemap/post-process pass to darken ambient lighting in crevices.
Kernel samples are stored in a UBO (binding 3) to stay within push constant limits.
"""
def __init__(self, engine: Any):
self._engine = engine
self._ready = False
# Compute pipelines
self._ssao_pipeline: Any = None
self._ssao_layout: Any = None
self._blur_pipeline: Any = None
self._blur_layout: Any = None
self._ssao_module: Any = None
self._blur_module: Any = None
# Images
self._ao_image: Any = None
self._ao_memory: Any = None
self._ao_view: Any = None
self._blur_image: Any = None
self._blur_memory: Any = None
self._blur_view: Any = None
self._noise_image: Any = None
self._noise_memory: Any = None
self._noise_view: Any = None
# Kernel UBO
self._kernel_buf: Any = None
self._kernel_mem: Any = None
# Descriptors
self._ssao_desc_pool: Any = None
self._ssao_desc_layout: Any = None
self._ssao_desc_set: Any = None
self._blur_desc_pool: Any = None
self._blur_desc_layout: Any = None
self._blur_desc_set: Any = None
self._depth_sampler: Any = None
self._noise_sampler: Any = None
# Kernel samples (pre-computed hemisphere)
self._kernel: np.ndarray = np.zeros((KERNEL_SIZE, 4), dtype=np.float32)
# Depth image reference (for layout transitions)
self._depth_image: Any = None
# Dimensions
self._width: int = 0
self._height: int = 0
# Public settings
self.enabled: bool = True
self.radius: float = 0.5
self.bias: float = 0.025
self.intensity: float = 1.0
[docs]
@property
def ao_view(self) -> Any:
"""Blurred AO image view for sampling in post-process."""
return self._blur_view
[docs]
def setup(self, width: int, height: int, depth_view: Any, depth_image: Any = None) -> None:
"""Initialize SSAO resources: noise texture, AO images, kernel, compute pipelines."""
self._width = width
self._height = height
self._depth_image = depth_image
self._generate_kernel()
self._create_kernel_ubo()
self._create_noise_texture()
self._create_ao_images(width, height)
self._create_samplers()
self._create_ssao_descriptors(depth_view)
self._create_blur_descriptors()
self._create_pipelines()
self._ready = True
log.debug("SSAO pass initialized (%dx%d half-res, %d kernel samples)", width, height, KERNEL_SIZE)
def _generate_kernel(self) -> None:
"""Generate hemisphere kernel samples with accelerating distribution."""
rng = np.random.default_rng(42)
for i in range(KERNEL_SIZE):
sample = np.array(
[
rng.uniform(-1.0, 1.0),
rng.uniform(-1.0, 1.0),
rng.uniform(0.0, 1.0),
],
dtype=np.float32,
)
sample /= np.linalg.norm(sample)
sample *= rng.uniform(0.0, 1.0)
# Accelerate distribution: more samples close to origin
scale = i / KERNEL_SIZE
scale = 0.1 + scale * scale * 0.9
sample *= scale
self._kernel[i, :3] = sample
def _create_kernel_ubo(self) -> None:
"""Create UBO for hemisphere kernel samples (32 * vec4 = 512 bytes)."""
e = self._engine
ubo_size = KERNEL_SIZE * 16 # 32 * vec4(16 bytes)
self._kernel_buf, self._kernel_mem = create_buffer(
e.ctx.device,
e.ctx.physical_device,
ubo_size,
vk.VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
)
upload_numpy(e.ctx.device, self._kernel_mem, self._kernel)
def _create_noise_texture(self) -> None:
"""Create 4x4 noise texture with random tangent-space rotations."""
e = self._engine
rng = np.random.default_rng(7)
noise_data = np.zeros((4, 4, 4), dtype=np.uint8)
for y in range(4):
for x in range(4):
rx = rng.uniform(-1.0, 1.0)
ry = rng.uniform(-1.0, 1.0)
length = max(np.sqrt(rx * rx + ry * ry), 1e-6)
rx /= length
ry /= length
noise_data[y, x, 0] = int((rx * 0.5 + 0.5) * 255)
noise_data[y, x, 1] = int((ry * 0.5 + 0.5) * 255)
noise_data[y, x, 2] = 0
noise_data[y, x, 3] = 255
self._noise_image, self._noise_memory = upload_image_data(
e.ctx.device,
e.ctx.physical_device,
e.ctx.graphics_queue,
e.ctx.command_pool,
np.ascontiguousarray(noise_data),
4,
4,
vk.VK_FORMAT_R8G8B8A8_UNORM,
)
self._noise_view = vk.vkCreateImageView(
e.ctx.device,
vk.VkImageViewCreateInfo(
image=self._noise_image,
viewType=vk.VK_IMAGE_VIEW_TYPE_2D,
format=vk.VK_FORMAT_R8G8B8A8_UNORM,
subresourceRange=vk.VkImageSubresourceRange(
aspectMask=vk.VK_IMAGE_ASPECT_COLOR_BIT,
baseMipLevel=0,
levelCount=1,
baseArrayLayer=0,
layerCount=1,
),
),
None,
)
def _create_ao_images(self, width: int, height: int) -> None:
"""Create half-res R8 images for raw AO and blurred AO."""
e = self._engine
hw, hh = max(1, width // 2), max(1, height // 2)
for attr in ("_ao", "_blur"):
image, memory = create_image(
e.ctx.device,
e.ctx.physical_device,
hw,
hh,
vk.VK_FORMAT_R8_UNORM,
vk.VK_IMAGE_USAGE_STORAGE_BIT | vk.VK_IMAGE_USAGE_SAMPLED_BIT,
)
view = vk.vkCreateImageView(
e.ctx.device,
vk.VkImageViewCreateInfo(
image=image,
viewType=vk.VK_IMAGE_VIEW_TYPE_2D,
format=vk.VK_FORMAT_R8_UNORM,
subresourceRange=vk.VkImageSubresourceRange(
aspectMask=vk.VK_IMAGE_ASPECT_COLOR_BIT,
baseMipLevel=0,
levelCount=1,
baseArrayLayer=0,
layerCount=1,
),
),
None,
)
setattr(self, f"{attr}_image", image)
setattr(self, f"{attr}_memory", memory)
setattr(self, f"{attr}_view", view)
# Transition both to GENERAL for compute storage
from ..gpu.memory import transition_image_layout
for img in (self._ao_image, self._blur_image):
transition_image_layout(
e.ctx.device,
e.ctx.graphics_queue,
e.ctx.command_pool,
img,
vk.VK_IMAGE_LAYOUT_UNDEFINED,
vk.VK_IMAGE_LAYOUT_GENERAL,
)
def _create_samplers(self) -> None:
"""Create samplers for depth and noise textures."""
device = self._engine.ctx.device
self._depth_sampler = vk.vkCreateSampler(
device,
vk.VkSamplerCreateInfo(
magFilter=vk.VK_FILTER_NEAREST,
minFilter=vk.VK_FILTER_NEAREST,
addressModeU=vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
addressModeV=vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
addressModeW=vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
anisotropyEnable=vk.VK_FALSE,
unnormalizedCoordinates=vk.VK_FALSE,
mipmapMode=vk.VK_SAMPLER_MIPMAP_MODE_NEAREST,
),
None,
)
self._noise_sampler = vk.vkCreateSampler(
device,
vk.VkSamplerCreateInfo(
magFilter=vk.VK_FILTER_NEAREST,
minFilter=vk.VK_FILTER_NEAREST,
addressModeU=vk.VK_SAMPLER_ADDRESS_MODE_REPEAT,
addressModeV=vk.VK_SAMPLER_ADDRESS_MODE_REPEAT,
addressModeW=vk.VK_SAMPLER_ADDRESS_MODE_REPEAT,
anisotropyEnable=vk.VK_FALSE,
unnormalizedCoordinates=vk.VK_FALSE,
mipmapMode=vk.VK_SAMPLER_MIPMAP_MODE_NEAREST,
),
None,
)
def _create_ssao_descriptors(self, depth_view: Any) -> None:
"""Create descriptor set: depth + noise (samplers), AO output (storage), kernel (UBO)."""
device = self._engine.ctx.device
cs = vk.VK_SHADER_STAGE_COMPUTE_BIT
self._ssao_desc_layout = create_descriptor_set_layout(device, [
(0, vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, cs, 1),
(1, vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, cs, 1),
(2, vk.VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, cs, 1),
(3, vk.VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, cs, 1),
])
self._ssao_desc_pool = create_pool_for_types(device, {
vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 2,
vk.VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 1,
vk.VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 1,
})
self._ssao_desc_set = allocate_descriptor_set(device, self._ssao_desc_pool, self._ssao_desc_layout)
self._write_ssao_descriptors(depth_view)
def _write_ssao_descriptors(self, depth_view: Any) -> None:
"""Write depth, noise, AO output, and kernel UBO to SSAO descriptor set."""
with DescriptorWriteBatch(self._engine.ctx.device) as batch:
batch.image(
self._ssao_desc_set, 0, depth_view, self._depth_sampler,
image_layout=vk.VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
)
batch.image(self._ssao_desc_set, 1, self._noise_view, self._noise_sampler)
batch.storage_image(self._ssao_desc_set, 2, self._ao_view)
batch.uniform_buffer(self._ssao_desc_set, 3, self._kernel_buf, KERNEL_SIZE * 16)
def _create_blur_descriptors(self) -> None:
"""Create descriptor set for blur compute: AO input (storage) + blur output (storage)."""
device = self._engine.ctx.device
cs = vk.VK_SHADER_STAGE_COMPUTE_BIT
self._blur_desc_layout = create_descriptor_set_layout(device, [
(0, vk.VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, cs, 1),
(1, vk.VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, cs, 1),
])
self._blur_desc_pool = create_pool_for_types(
device, {vk.VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 2},
)
self._blur_desc_set = allocate_descriptor_set(device, self._blur_desc_pool, self._blur_desc_layout)
self._write_blur_descriptors()
def _write_blur_descriptors(self) -> None:
"""Write AO input and blur output to blur descriptor set."""
with DescriptorWriteBatch(self._engine.ctx.device) as batch:
batch.storage_image(self._blur_desc_set, 0, self._ao_view)
batch.storage_image(self._blur_desc_set, 1, self._blur_view)
def _create_pipelines(self) -> None:
"""Create SSAO and blur compute pipelines."""
e = self._engine
device = e.ctx.device
shader_dir = e.shader_dir
self._ssao_pipeline, self._ssao_layout, self._ssao_module = create_compute_pipeline(
device, shader_dir / "ssao.comp", [self._ssao_desc_layout], _PC_SIZE,
)
self._blur_pipeline, self._blur_layout, self._blur_module = create_compute_pipeline(
device, shader_dir / "ssao_blur.comp", [self._blur_desc_layout], 0,
)
[docs]
def render(self, cmd: Any, proj_matrix: np.ndarray) -> None:
"""Dispatch SSAO compute + blur. Call between HDR pass end and tonemap.
Args:
cmd: Active command buffer (outside any render pass).
proj_matrix: Camera projection matrix (row-major numpy, transposed for GPU).
"""
if not self._ready or not self.enabled:
return
ffi = vk.ffi
hw = max(1, self._width // 2)
hh = max(1, self._height // 2)
groups_x = (hw + 7) // 8
groups_y = (hh + 7) // 8
# Ensure depth writes are visible before compute reads.
# The HDR render pass already transitions depth to READ_ONLY_OPTIMAL
# (samplable_depth=True), so oldLayout matches the current layout.
if self._depth_image:
depth_barrier = vk.VkImageMemoryBarrier(
srcAccessMask=vk.VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
dstAccessMask=vk.VK_ACCESS_SHADER_READ_BIT,
oldLayout=vk.VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
newLayout=vk.VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
srcQueueFamilyIndex=vk.VK_QUEUE_FAMILY_IGNORED,
dstQueueFamilyIndex=vk.VK_QUEUE_FAMILY_IGNORED,
image=self._depth_image,
subresourceRange=vk.VkImageSubresourceRange(
aspectMask=vk.VK_IMAGE_ASPECT_DEPTH_BIT,
baseMipLevel=0,
levelCount=1,
baseArrayLayer=0,
layerCount=1,
),
)
vk.vkCmdPipelineBarrier(
cmd,
vk.VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
vk.VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
0,
None,
0,
None,
1,
[depth_barrier],
)
# --- SSAO generation ---
vk.vkCmdBindPipeline(cmd, vk.VK_PIPELINE_BIND_POINT_COMPUTE, self._ssao_pipeline)
vk.vkCmdBindDescriptorSets(
cmd,
vk.VK_PIPELINE_BIND_POINT_COMPUTE,
self._ssao_layout,
0,
1,
[self._ssao_desc_set],
0,
None,
)
# Push constants: mat4 proj(64) + vec4 params(16) + vec4 resolution(16) = 96 bytes
proj_t = np.ascontiguousarray(proj_matrix.T, dtype=np.float32)
params = np.array([self.radius, self.bias, self.intensity, 0.0], dtype=np.float32)
resolution = np.array([float(hw), float(hh), 1.0 / hw, 1.0 / hh], dtype=np.float32)
pc_data = proj_t.tobytes() + params.tobytes() + resolution.tobytes()
cbuf = ffi.new("char[]", pc_data)
vk._vulkan.lib.vkCmdPushConstants(
cmd,
self._ssao_layout,
vk.VK_SHADER_STAGE_COMPUTE_BIT,
0,
_PC_SIZE,
cbuf,
)
vk.vkCmdDispatch(cmd, groups_x, groups_y, 1)
# Barrier: SSAO write -> blur read
barrier = vk.VkImageMemoryBarrier(
srcAccessMask=vk.VK_ACCESS_SHADER_WRITE_BIT,
dstAccessMask=vk.VK_ACCESS_SHADER_READ_BIT,
oldLayout=vk.VK_IMAGE_LAYOUT_GENERAL,
newLayout=vk.VK_IMAGE_LAYOUT_GENERAL,
srcQueueFamilyIndex=vk.VK_QUEUE_FAMILY_IGNORED,
dstQueueFamilyIndex=vk.VK_QUEUE_FAMILY_IGNORED,
image=self._ao_image,
subresourceRange=vk.VkImageSubresourceRange(
aspectMask=vk.VK_IMAGE_ASPECT_COLOR_BIT,
baseMipLevel=0,
levelCount=1,
baseArrayLayer=0,
layerCount=1,
),
)
vk.vkCmdPipelineBarrier(
cmd,
vk.VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
vk.VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
0,
None,
0,
None,
1,
[barrier],
)
# --- Blur ---
vk.vkCmdBindPipeline(cmd, vk.VK_PIPELINE_BIND_POINT_COMPUTE, self._blur_pipeline)
vk.vkCmdBindDescriptorSets(
cmd,
vk.VK_PIPELINE_BIND_POINT_COMPUTE,
self._blur_layout,
0,
1,
[self._blur_desc_set],
0,
None,
)
vk.vkCmdDispatch(cmd, groups_x, groups_y, 1)
# Barrier: blur write -> fragment shader read (tonemap sampling)
blur_barrier = vk.VkImageMemoryBarrier(
srcAccessMask=vk.VK_ACCESS_SHADER_WRITE_BIT,
dstAccessMask=vk.VK_ACCESS_SHADER_READ_BIT,
oldLayout=vk.VK_IMAGE_LAYOUT_GENERAL,
newLayout=vk.VK_IMAGE_LAYOUT_GENERAL,
srcQueueFamilyIndex=vk.VK_QUEUE_FAMILY_IGNORED,
dstQueueFamilyIndex=vk.VK_QUEUE_FAMILY_IGNORED,
image=self._blur_image,
subresourceRange=vk.VkImageSubresourceRange(
aspectMask=vk.VK_IMAGE_ASPECT_COLOR_BIT,
baseMipLevel=0,
levelCount=1,
baseArrayLayer=0,
layerCount=1,
),
)
vk.vkCmdPipelineBarrier(
cmd,
vk.VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
vk.VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0,
0,
None,
0,
None,
1,
[blur_barrier],
)
[docs]
def resize(self, width: int, height: int, depth_view: Any, depth_image: Any = None) -> None:
"""Recreate AO images for new dimensions."""
if not self._ready:
return
self._width = width
self._height = height
if depth_image is not None:
self._depth_image = depth_image
self._destroy_ao_images()
self._create_ao_images(width, height)
self._write_ssao_descriptors(depth_view)
self._write_blur_descriptors()
def _destroy_ao_images(self) -> None:
"""Destroy AO and blur images/views/memory."""
device = self._engine.ctx.device
for attr in ("_ao", "_blur"):
for suffix in ("_view", "_image", "_memory"):
obj = getattr(self, f"{attr}{suffix}", None)
if obj:
if suffix == "_view":
vk.vkDestroyImageView(device, obj, None)
elif suffix == "_image":
vk.vkDestroyImage(device, obj, None)
elif suffix == "_memory":
vk.vkFreeMemory(device, obj, None)
setattr(self, f"{attr}{suffix}", None)
[docs]
def cleanup(self) -> None:
"""Release all GPU resources."""
if not self._ready:
return
device = self._engine.ctx.device
for pipeline, layout in [
(self._ssao_pipeline, self._ssao_layout),
(self._blur_pipeline, self._blur_layout),
]:
if pipeline:
vk.vkDestroyPipeline(device, pipeline, None)
if layout:
vk.vkDestroyPipelineLayout(device, layout, None)
if self._ssao_module:
vk.vkDestroyShaderModule(device, self._ssao_module, None)
if self._blur_module:
vk.vkDestroyShaderModule(device, self._blur_module, None)
if self._ssao_desc_pool:
vk.vkDestroyDescriptorPool(device, self._ssao_desc_pool, None)
if self._blur_desc_pool:
vk.vkDestroyDescriptorPool(device, self._blur_desc_pool, None)
if self._ssao_desc_layout:
vk.vkDestroyDescriptorSetLayout(device, self._ssao_desc_layout, None)
if self._blur_desc_layout:
vk.vkDestroyDescriptorSetLayout(device, self._blur_desc_layout, None)
if self._depth_sampler:
vk.vkDestroySampler(device, self._depth_sampler, None)
if self._noise_sampler:
vk.vkDestroySampler(device, self._noise_sampler, None)
self._destroy_ao_images()
if self._noise_view:
vk.vkDestroyImageView(device, self._noise_view, None)
if self._noise_image:
vk.vkDestroyImage(device, self._noise_image, None)
if self._noise_memory:
vk.vkFreeMemory(device, self._noise_memory, None)
if self._kernel_buf:
vk.vkDestroyBuffer(device, self._kernel_buf, None)
if self._kernel_mem:
vk.vkFreeMemory(device, self._kernel_mem, None)
self._ready = False