"""Pinhole camera projection and back-projection utilities."""
from typing import Union, List
import numpy as np
from robotblockset.cameras.spatial_operations import _HomogeneousPoints
from robotblockset.rbs_typing import CameraIntrinsicsMatrixType, CameraExtrinsicMatrixType, Vector2DArrayType, Vector3DArrayType, Vector3DType, HomogeneousMatrixType, NumpyDepthMapType
[docs]
def project_points_to_image_plane(
positions_in_camera_frame: Union[Vector3DArrayType, Vector3DType],
camera_intrinsics: CameraIntrinsicsMatrixType,
) -> Vector2DArrayType:
"""
Project 3D camera-frame points to image pixels.
Parameters
----------
positions_in_camera_frame : Union[Vector3DArrayType, Vector3DType]
3D point positions in the camera frame.
camera_intrinsics : CameraIntrinsicsMatrixType
Camera intrinsics matrix with shape ``(3, 3)``.
Returns
-------
Vector2DArrayType
Pixel coordinates on the image plane.
"""
homogeneous_positions_in_camera_frame = _HomogeneousPoints(positions_in_camera_frame).homogeneous_points.T
homogeneous_positions_on_image_plane = camera_intrinsics @ homogeneous_positions_in_camera_frame[:3, ...]
positions_on_image_plane = homogeneous_positions_on_image_plane[:2, ...] / homogeneous_positions_on_image_plane[2, ...]
return positions_on_image_plane.T
[docs]
def multiview_triangulation_midpoint(
extrinsics_matrices: List[CameraExtrinsicMatrixType],
intrinsics_matrices: List[CameraIntrinsicsMatrixType],
image_coordinates: Vector2DArrayType,
) -> Vector3DType:
"""
Triangulate a point from multiple views using the midpoint method.
Parameters
----------
extrinsics_matrices : List[CameraExtrinsicMatrixType]
Extrinsics matrices for each viewpoint.
intrinsics_matrices : List[CameraIntrinsicsMatrixType]
Intrinsics matrices for each viewpoint.
image_coordinates : Vector2DArrayType
Image coordinates of the 3D point for each viewpoint.
Returns
-------
Vector3DType
Estimated 3D position in the world frame.
"""
# determine the rays for each camera in the world frame
rays = []
for extrinsics_matrix, intrinsics_matrix, image_point in zip(extrinsics_matrices, intrinsics_matrices, image_coordinates):
ray = extrinsics_matrix[:3, :3] @ np.linalg.inv(intrinsics_matrix) @ np.array([image_point[0], image_point[1], 1])
ray = ray / np.linalg.norm(ray)
rays.append(ray)
lhs = 0
rhs = 0
for i, ray in enumerate(rays):
rhs += (np.eye(3) - ray[:, np.newaxis] @ ray[np.newaxis, :]) @ extrinsics_matrices[i][:3, 3]
lhs += np.eye(3) - ray[:, np.newaxis] @ ray[np.newaxis, :]
lhs_inv = np.linalg.inv(lhs)
midpoint = lhs_inv @ rhs
return midpoint
[docs]
def calculate_triangulation_errors(
extrinsics_matrices: List[CameraExtrinsicMatrixType],
intrinsics_matrices: List[CameraIntrinsicsMatrixType],
image_coordinates: Vector2DArrayType,
point: Vector3DType,
) -> List[float]:
"""
Compute per-view triangulation errors.
Parameters
----------
extrinsics_matrices : List[CameraExtrinsicMatrixType]
Extrinsics matrices for each viewpoint.
intrinsics_matrices : List[CameraIntrinsicsMatrixType]
Intrinsics matrices for each viewpoint.
image_coordinates : Vector2DArrayType
Image coordinates of the 3D point for each viewpoint.
point : Vector3DType
Estimated 3D point in the world frame.
Returns
-------
List[float]
Euclidean distances between the point and each viewing ray.
"""
errors = []
for extrinsics_matrix, intrinsics_matrix, image_point in zip(extrinsics_matrices, intrinsics_matrices, image_coordinates):
ray = extrinsics_matrix[:3, :3] @ np.linalg.inv(intrinsics_matrix) @ np.array([image_point[0], image_point[1], 1])
ray = ray / np.linalg.norm(ray)
error = np.linalg.norm((np.eye(3) - ray[:, np.newaxis] @ ray[np.newaxis, :]) @ ((extrinsics_matrix[:3, 3]) - point))
errors.append(error.item())
return errors
"""methods for getting from 2D image coordinates to 3D world coordinates using depth information (a.k.a. unprojection)"""
[docs]
def unproject_using_depthmap(
image_coordinates: Vector2DArrayType,
depth_map: NumpyDepthMapType,
camera_intrinsics: CameraIntrinsicsMatrixType,
depth_heuristic_mask_size: int = 11,
depth_heuristic_percentile: float = 0.05,
) -> np.ndarray:
"""
Unprojects image coordinates to 3D positions using a depth map.
Parameters
----------
image_coordinates : Vector2DArrayType
numpy array of shape (N, 2) containing the 2D pixel coordinates of the points on the image plane
depth_map : NumpyDepthMapType
numpy array of shape (height, width) containing the depth values for each pixel, i.e. the z-value of the 3D point in the camera frame corresponding to the pixel
camera_intrinsics : CameraIntrinsicsMatrixType
camera intrinsics matrix as a numpy array of shape (3, 3)
Returns
-------
numpy array of shape (N, 3) containing the 3D positions of the points in the camera frame
"""
# TODO: should we make this extraction method more generic? though I prefer to keep it simple and not add too many options
depth_values = extract_depth_from_depthmap_heuristic(image_coordinates, depth_map, depth_heuristic_mask_size, depth_heuristic_percentile)
return unproject_onto_depth_values(image_coordinates, depth_values, camera_intrinsics)
[docs]
def unproject_onto_depth_values(
image_coordinates: Vector2DArrayType,
depth_values: np.ndarray,
camera_intrinsics: CameraIntrinsicsMatrixType,
) -> np.ndarray:
"""
Unprojects image coordinates to 3D positions using depth values for each coordinate.
Parameters
----------
image_coordinates : Vector2DArrayType
numpy array of shape (N, 2) containing the 2D pixel coordinates of the points on the image plane
depth_values : np.ndarray
numpy array of shape (N,) containing the depth values for each pixel, i.e. the z-value of the 3D point in the camera frame corresponding to the pixel
camera_intrinsics : CameraIntrinsicsMatrixType
camera intrinsics matrix as a numpy array of shape (3, 3)
Returns
-------
numpy array of shape (N, 3) containing the 3D positions of the points in the camera frame
"""
if image_coordinates.shape[0] != depth_values.shape[0]:
raise IndexError(f"coordinates and depth values must have the same length (but they are: {image_coordinates.shape[0]} and {depth_values.shape[0]})")
homogeneous_coords = np.ones((image_coordinates.shape[0], 3))
homogeneous_coords[:, :2] = image_coordinates
homogeneous_coords = np.transpose(homogeneous_coords)
rays_in_camera_frame = np.linalg.inv(camera_intrinsics) @ homogeneous_coords # shape is cast by numpy to column vector!
z_values_in_camera_frame = depth_values
t = z_values_in_camera_frame / rays_in_camera_frame[2, :]
positions_in_camera_frame = t * rays_in_camera_frame
homogeneous_positions_in_camera_frame = _HomogeneousPoints(positions_in_camera_frame.T).homogeneous_points.T
return homogeneous_positions_in_camera_frame[:3, ...].T
[docs]
def unproject_onto_world_z_plane(
image_coordinates: Vector2DArrayType,
camera_intrinsics: CameraIntrinsicsMatrixType,
camera_in_frame_pose: HomogeneousMatrixType,
height: float,
) -> Vector3DArrayType:
"""
Unproject image points onto a world Z plane.
Parameters
----------
image_coordinates : Vector2DArrayType
Pixel coordinates on the image plane.
camera_intrinsics : CameraIntrinsicsMatrixType
Camera intrinsics matrix with shape ``(3, 3)``.
camera_in_frame_pose : HomogeneousMatrixType
Camera pose in the target frame.
height : float
Height of the plane in the target frame.
Returns
-------
Vector3DArrayType
3D points on the plane ``Z = height`` in the target frame.
"""
# convert to homogeneous coordinates and transpose to column vectors
homogeneous_coords = np.ones((image_coordinates.shape[0], 3))
homogeneous_coords[:, :2] = image_coordinates
homogeneous_coords = np.transpose(homogeneous_coords)
camera_frame_ray_vector = np.linalg.inv(camera_intrinsics) @ homogeneous_coords
translation = camera_in_frame_pose[0:3, 3]
rotation_matrix = camera_in_frame_pose[0:3, 0:3]
world_frame_ray_vectors = rotation_matrix @ camera_frame_ray_vector
world_frame_ray_vectors = np.transpose(world_frame_ray_vectors)
t = (height - translation[2]) / world_frame_ray_vectors[:, 2]
points = t[:, np.newaxis] * world_frame_ray_vectors + translation
return points