"""ZED stereo RGB-D camera backend.
This module defines a ZED stereo RGB-D camera wrapper built on the ZED SDK Python bindings.
It provides stereo image acquisition, depth and point-cloud retrieval, intrinsics access for left/right views,
and runtime configuration for resolution, frame rate, and depth processing modes. The implementation exposes
ZED devices through the shared stereo RGB-D camera interface for unified downstream processing.
Key functionalities include:
- ZED SDK integration through the ``StereoRGBDCamera`` interface.
- Configurable stereo resolution, frame rate, and depth mode selection.
- Left/right RGB image retrieval with consistent image conversion utilities.
- Depth, confidence, and point-cloud data access from stereo reconstruction.
- Intrinsics retrieval for selected stereo camera views.
- Device selection by serial number and optional SVO-file input support.
Copyright (c) 2026 Jozef Stefan Institute
Authors: Leon Zlajpah.
"""
from __future__ import annotations
from typing import Any, List, Optional
import time
import cv2
import numpy as np
from robotblockset.cameras.interfaces import StereoRGBDCamera
from robotblockset.cameras.image_converter import ImageConverter
from robotblockset.rbs_typing import (
CameraIntrinsicsMatrixType,
CameraResolutionType,
HomogeneousMatrixType,
NumpyConfidenceMapType,
NumpyDepthMapType,
NumpyFloatImageType,
NumpyIntImageType,
OpenCVIntImageType,
PointCloud,
)
# from loguru import logger
from robotblockset.tools import get_logger
logger = get_logger(__name__)
try:
import pyzed.sl as sl # type: ignore
except ImportError:
raise ImportError("You should install the ZED SDK and pip install the python bindings in your environment first, see the installation README.")
# check SDK version when available; allow mocked imports during docs builds
_sdk_version = sl.Camera().get_sdk_version()
if isinstance(_sdk_version, str) and _sdk_version.split(".")[0] != "5":
raise ImportError("You should install version 5.X of the SDK!")
[docs]
class Zed(StereoRGBDCamera):
"""
Wrapper around the ZED SDK
https://www.stereolabs.com/zed-2i/
It is important to note that the ZED cameras are factory calibrated and hence provide undistorted images
and corresponding intrinsics matrices.
Also note that all depth values are relative to the left camera.
"""
# for more info on the different depth modes, see:
# https://www.stereolabs.com/docs/depth-sensing/depth-modes
# keep in mind though that the depth map is calculated during the `grab`operation, so the depth mode also influences the
# fps of the rgb images, which is why the default depth mode is None
NEURAL_DEPTH_MODE = sl.DEPTH_MODE.NEURAL
NEURAL_LIGHT_DEPTH_MODE = sl.DEPTH_MODE.NEURAL_LIGHT
NEURAL_PLUS_DEPTH_MODE = sl.DEPTH_MODE.NEURAL_PLUS
# no depth mode, higher troughput of the RGB images as the GPU has to do less work
# can also turn depth off in the runtime params, which is recommended as it allows for switching at runtime.
NONE_DEPTH_MODE = sl.DEPTH_MODE.NONE
DEPTH_MODES = (NEURAL_DEPTH_MODE, NONE_DEPTH_MODE, NEURAL_LIGHT_DEPTH_MODE, NEURAL_PLUS_DEPTH_MODE)
# for info on image resolution, pixel sizes, fov..., see:
# https://support.stereolabs.com/hc/en-us/articles/360007395634-What-is-the-camera-focal-length-and-field-of-view-
# make sure to check the combination of frame rates and resolution is available.
RESOLUTION_2K = (2208, 1242)
RESOLUTION_1080 = (1920, 1080)
RESOLUTION_720 = (1280, 720)
RESOLUTION_VGA = (672, 376)
resolution_to_identifier_dict = {
RESOLUTION_2K: sl.RESOLUTION.HD2K,
RESOLUTION_1080: sl.RESOLUTION.HD1080,
RESOLUTION_720: sl.RESOLUTION.HD720,
RESOLUTION_VGA: sl.RESOLUTION.VGA,
}
[docs]
def __init__( # type: ignore[no-any-unimported]
self,
resolution: CameraResolutionType = RESOLUTION_2K,
fps: int = 15,
depth_mode: sl.DEPTH_MODE = NONE_DEPTH_MODE,
serial_number: Optional[str] = None,
svo_filepath: Optional[str] = None,
) -> None:
"""
Initialize a ZED stereo RGB-D camera instance.
Parameters
----------
resolution : CameraResolutionType, optional
CameraResolutionType, default=RESOLUTION_2K
Requested stream resolution ``(width, height)``.
fps : int, optional
int, default=15
Target camera frame rate.
depth_mode : sl.DEPTH_MODE, optional
sl.DEPTH_MODE, default=NONE_DEPTH_MODE
Depth processing mode configured in the ZED SDK.
serial_number : str, optional
Camera serial number used to select a specific device.
svo_filepath : str, optional
Path to an SVO file used as the camera input source.
Raises
------
IndexError
If the camera cannot be opened after multiple attempts.
"""
self._resolution = resolution
self._fps = fps
self.depth_mode = depth_mode
self.serial_number = int(serial_number) if serial_number else None
self.camera = sl.Camera()
# TODO: create a configuration class for the camera parameters
self.camera_params = sl.InitParameters()
if self.serial_number:
self.camera_params.set_from_serial_number(self.serial_number)
if svo_filepath:
input_type = sl.InputType()
input_type.set_from_svo_file(svo_filepath)
self.camera_params = sl.InitParameters(input_t=input_type, svo_real_time_mode=False)
self.camera_params.camera_resolution = Zed.resolution_to_identifier_dict[resolution]
self.camera_params.camera_fps = fps
# https://www.stereolabs.com/docs/depth-sensing/depth-settings/
self.camera_params.depth_mode = depth_mode
self.camera_params.coordinate_units = sl.UNIT.METER
# objects closerby will have artifacts so they are filtered out (querying them will give a - Infinty)
self.camera_params.depth_minimum_distance = 0.3
self.camera_params.depth_maximum_distance = 10.0 # filter out far away objects
if self.camera.is_opened():
# close to open with correct params
self.camera.close()
N_OPEN_ATTEMPTS = 5
for i in range(N_OPEN_ATTEMPTS):
status = self.camera.open(self.camera_params)
if status == sl.ERROR_CODE.SUCCESS:
break
logger.info(f"Opening Zed camera failed, attempt {i + 1}/{N_OPEN_ATTEMPTS}")
if self.serial_number:
logger.info(f"Rebooting {self.serial_number}")
sl.Camera.reboot(self.serial_number)
time.sleep(2)
logger.info(f"Available ZED cameras: {sl.Camera.get_device_list()}")
self.camera = sl.Camera()
if status != sl.ERROR_CODE.SUCCESS:
logger.error("Could not open Zed camera. Sometimes, unplugging the camera and plugging it back in helps.\n" "Alternatively, try to reboot the camera by running `ZED_Explorer --reboot` in a terminal.")
raise IndexError(f"Could not open Zed camera, error = {status}")
# TODO: create a configuration class for the runtime parameters
self.runtime_params = sl.RuntimeParameters()
# Enabling fill mode changed for SDK 4.0: https://www.stereolabs.com/developers/release/4.0/migration-guide/
self.runtime_params.enable_fill_mode = False # standard > fill for accuracy. See docs.
self.runtime_params.texture_confidence_threshold = 100
self.runtime_params.confidence_threshold = 100
self.depth_enabled = True
# Enable Positional tracking (mandatory for object detection)
# positional_tracking_parameters = sl.PositionalTrackingParameters()
# If the camera is static, uncomment the following line to have better performances and boxes sticked to the ground.
# positional_tracking_parameters.set_as_static = True
# self.camera.enable_positional_tracking(positional_tracking_parameters)
# create reusable memory blocks for the measures
# these will be allocated the first time they are used
self.image_matrix = sl.Mat()
self.image_matrix_right = sl.Mat()
self.depth_image_matrix = sl.Mat()
self.depth_matrix = sl.Mat()
self.point_cloud_matrix = sl.Mat()
self.confidence_matrix = sl.Mat()
self.confidence_map = None
@property
def fps(self) -> int:
"""The frame rate of the camera, in frames per second."""
return self._fps
@property
def resolution(self) -> CameraResolutionType:
"""
Return the configured camera resolution.
Returns
-------
CameraResolutionType
Resolution ``(width, height)`` in pixels.
"""
return self._resolution
[docs]
def intrinsics_matrix(self, view: str = StereoRGBDCamera.LEFT_RGB) -> CameraIntrinsicsMatrixType:
"""
Return the intrinsics matrix for a selected stereo view.
Parameters
----------
view : str, optional
str, default=StereoRGBDCamera.LEFT_RGB
Requested view identifier.
Returns
-------
CameraIntrinsicsMatrixType
Rectified camera intrinsics matrix with shape ``(3, 3)``.
Raises
------
ValueError
If ``view`` is not a supported stereo view.
"""
# get the 'rectified' intrinsics matrices.
# https://www.stereolabs.com/docs/api/python/classpyzed_1_1sl_1_1CameraParameters.html
if view == self.LEFT_RGB:
fx = self.camera.get_camera_information().camera_configuration.calibration_parameters.left_cam.fx
fy = self.camera.get_camera_information().camera_configuration.calibration_parameters.left_cam.fy
cx = self.camera.get_camera_information().camera_configuration.calibration_parameters.left_cam.cx
cy = self.camera.get_camera_information().camera_configuration.calibration_parameters.left_cam.cy
elif view == self.RIGHT_RGB:
fx = self.camera.get_camera_information().camera_configuration.calibration_parameters.right_cam.fx
fy = self.camera.get_camera_information().camera_configuration.calibration_parameters.right_cam.fy
cx = self.camera.get_camera_information().camera_configuration.calibration_parameters.right_cam.cx
cy = self.camera.get_camera_information().camera_configuration.calibration_parameters.right_cam.cy
else:
raise ValueError(f"view must be one of {self._VIEWS}")
cam_matrix = np.zeros((3, 3))
cam_matrix[0, 0] = fx
cam_matrix[1, 1] = fy
cam_matrix[2, 2] = 1
cam_matrix[0, 2] = cx
cam_matrix[1, 2] = cy
return cam_matrix
@property
def pose_of_right_view_in_left_view(self) -> HomogeneousMatrixType:
"""
Return the right-camera pose expressed in the left-camera frame.
Returns
-------
HomogeneousMatrixType
Homogeneous transform from left view to right view.
"""
# get the 'rectified' pose of the right view wrt to the left view
# should be approx a translation along the x-axis of 120mm (Zed2i camera), expressed in the unit of the coordinates, which we set to meters.
# https://www.stereolabs.com/docs/api/python/classpyzed_1_1sl_1_1CalibrationParameters.html
# Note: the CalibrationParameters class changed for the SDK 4.0, the old .T attribute we used was removed.
return self.camera.get_camera_information().camera_configuration.calibration_parameters.stereo_transform.m
@property
def depth_enabled(self) -> bool:
"""Enable or disable depth processing.
Runtime parameter to enable/disable the depth & point_cloud computation. This speeds up the RGB image capture."""
return self.runtime_params.enable_depth
@depth_enabled.setter
def depth_enabled(self, value: bool) -> None:
"""
Enable or disable depth and point-cloud computation at runtime.
Parameters
----------
value : bool
Whether depth computation should be enabled.
Returns
-------
None
"""
self.runtime_params.enable_depth = value
def _grab_images(self) -> None:
"""Grab and process the latest ZED images.
grabs (and waits for) the latest image(s) from the camera, rectifies them and computes the depth information (based on the depth mode setting)"""
# this is a blocking call
# https://www.stereolabs.com/docs/api/python/classpyzed_1_1sl_1_1Camera.html#a2338c15f49b5f132df373a06bd281822
# we might want to consider running this in a seperate thread and using a queue to store the images?
error_code = self.camera.grab(self.runtime_params)
if error_code != sl.ERROR_CODE.SUCCESS:
raise IndexError("Could not grab new camera frame")
def _retrieve_rgb_image(self, view: str = StereoRGBDCamera.LEFT_RGB) -> NumpyFloatImageType:
"""
Retrieve the latest RGB frame as normalized floating-point image.
Parameters
----------
view : str, optional
str, default=StereoRGBDCamera.LEFT_RGB
Requested stereo view.
Returns
-------
NumpyFloatImageType
RGB image in float format.
"""
image = self._retrieve_rgb_image_as_int(view)
# convert from int to float image
# this can take up ~ ms for larger images (can impact FPS)
image = ImageConverter.from_numpy_int_format(image).image_in_numpy_format
return image
def _retrieve_rgb_image_as_int(self, view: str = StereoRGBDCamera.LEFT_RGB) -> NumpyIntImageType:
"""
Retrieve the latest RGB frame as integer image.
Parameters
----------
view : str, optional
str, default=StereoRGBDCamera.LEFT_RGB
Requested stereo view.
Returns
-------
NumpyIntImageType
RGB image in integer format.
Raises
------
ValueError
If ``view`` is not a supported stereo view.
"""
if view not in StereoRGBDCamera._VIEWS:
raise ValueError(f"view must be one of {self._VIEWS}, but was {view}")
image_bgra: OpenCVIntImageType
if view == StereoRGBDCamera.RIGHT_RGB:
self.camera.retrieve_image(self.image_matrix_right, sl.VIEW.RIGHT)
image_bgra = self.image_matrix_right.get_data()
else:
self.camera.retrieve_image(self.image_matrix, sl.VIEW.LEFT)
image_bgra = self.image_matrix.get_data()
image = cv2.cvtColor(image_bgra, cv2.COLOR_BGRA2RGB)
return image
def _retrieve_depth_map(self) -> NumpyDepthMapType:
"""
Retrieve the latest depth map in metric units.
Returns
-------
NumpyDepthMapType
Depth map relative to the left camera.
Raises
------
RuntimeError
If depth mode is disabled or depth retrieval is turned off.
"""
if self.depth_mode == self.NONE_DEPTH_MODE:
raise RuntimeError("Cannot retrieve depth data if depth mode is NONE")
if not self.depth_enabled:
raise RuntimeError("Cannot retrieve depth data if depth is disabled")
self.camera.retrieve_measure(self.depth_matrix, sl.MEASURE.DEPTH)
depth_map = self.depth_matrix.get_data()
return depth_map
def _retrieve_depth_image(self) -> NumpyIntImageType:
"""
Retrieve the latest colorized depth image.
Returns
-------
NumpyIntImageType
Colorized depth image.
Raises
------
RuntimeError
If depth mode is disabled or depth retrieval is turned off.
"""
if self.depth_mode == self.NONE_DEPTH_MODE:
raise RuntimeError("Cannot retrieve depth data if depth mode is NONE")
if not self.depth_enabled:
raise RuntimeError("Cannot retrieve depth data if depth is disabled")
self.camera.retrieve_image(self.depth_image_matrix, sl.VIEW.DEPTH)
image_bgra = self.depth_image_matrix.get_data()
# image = image[..., :3]
image = cv2.cvtColor(image_bgra, cv2.COLOR_BGRA2RGB)
return image
def _retrieve_colored_point_cloud(self) -> PointCloud:
"""
Retrieve the latest colored point cloud.
Returns
-------
PointCloud
Point cloud with XYZ positions and RGB colors.
Raises
------
RuntimeError
If depth mode is disabled or depth retrieval is turned off.
"""
if self.depth_mode == self.NONE_DEPTH_MODE:
raise RuntimeError("Cannot retrieve depth data if depth mode is NONE")
if not self.depth_enabled:
raise RuntimeError("Cannot retrieve depth data if depth is disabled")
self.camera.retrieve_measure(self.point_cloud_matrix, sl.MEASURE.XYZ)
# shape (width, height, 4) with the 4th dim being x,y,z,(rgba packed into float)
# can be nan,nan,nan, nan (no point in the point_cloud on this pixel)
# or x,y,z, nan (no color information on this pixel??)
# or x,y,z, value (color information on this pixel)
point_cloud_XYZ_ = self.point_cloud_matrix.get_data()
positions = cv2.cvtColor(point_cloud_XYZ_, cv2.COLOR_BGRA2BGR).reshape(-1, 3)
colors = self._retrieve_rgb_image_as_int().reshape(-1, 3)
return PointCloud(positions, colors)
def _retrieve_confidence_map(self) -> NumpyConfidenceMapType:
"""
Retrieve the latest normalized depth confidence map.
Returns
-------
NumpyConfidenceMapType
Confidence map in ``[0, 1]``, where higher is better.
"""
self.camera.retrieve_measure(self.confidence_matrix, sl.MEASURE.CONFIDENCE)
zed_confidence_map = self.confidence_matrix.get_data() # single channel float32 image
# The ZED confidence map is in the range [0, 100], where 0 is the highest confidence and 100 the lowest.
# See: https://www.stereolabs.com/docs/depth-sensing/depth-settings#depth-confidence-filtering
# We convert it to a more standard range [0, 1], where 0 is the lowest confidence and 1 the highest.
confidence_map = 1 - zed_confidence_map / 100.0
return confidence_map
[docs]
def get_colored_point_cloud(self) -> PointCloud:
"""
Grab a new frame and return the corresponding colored point cloud.
Returns
-------
PointCloud
Newly acquired colored point cloud.
Raises
------
RuntimeError
If depth mode is disabled or depth retrieval is turned off.
"""
if self.depth_mode == self.NONE_DEPTH_MODE:
raise RuntimeError("Cannot retrieve depth data if depth mode is NONE")
if not self.depth_enabled:
raise RuntimeError("Cannot retrieve depth data if depth is disabled")
self._grab_images()
return self._retrieve_colored_point_cloud()
[docs]
@staticmethod
def list_camera_serial_numbers() -> List[str]:
"""
List all connected ZED cameras
can be used to select a device ID or to check if cameras are connected.
"""
device_list = sl.Camera.get_device_list()
return device_list
# manage resources
# this is important if you want to reuse the camera
# multiple times within a python script, in which case you should release the camera before creating a new object.
# cf. https://stackoverflow.com/questions/865115/how-do-i-correctly-clean-up-a-python-object
def __enter__(self) -> StereoRGBDCamera:
"""
Enter context manager.
Returns
-------
StereoRGBDCamera
The current camera instance.
"""
return self
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
"""
Exit context manager and close the camera device.
Parameters
----------
exc_type : Any
Exception type raised inside the context, if any.
exc_value : Any
Exception instance raised inside the context, if any.
traceback : Any
Exception traceback, if any.
Returns
-------
None
"""
self.camera.close()
def _test_zed_implementation() -> None:
"""this script serves as a 'test' for the zed implementation."""
from robotblockset.cameras.manual_test_hw import manual_test_stereo_rgbd_camera, profile_rgb_throughput
# zed specific tests:
# - list all serial numbers of the cameras
serial_numbers = Zed.list_camera_serial_numbers()
print(serial_numbers)
input("each camera connected to the pc should be listed, press enter to continue")
# test rgbd stereo camera
with Zed(Zed.RESOLUTION_2K, fps=15, depth_mode=Zed.NEURAL_LIGHT_DEPTH_MODE) as zed:
print(zed.get_colored_point_cloud().points) # TODO: test the point_cloud more explicity?
manual_test_stereo_rgbd_camera(zed)
# profile rgb throughput, should be at 60FPS, i.e. 0.017s
zed = Zed(Zed.RESOLUTION_720, fps=60)
profile_rgb_throughput(zed)
if __name__ == "__main__":
_test_zed_implementation()