Source code for robotblockset.cameras.pinhole_operations

"""Pinhole camera projection and back-projection utilities."""

from typing import Union, List
import numpy as np

from robotblockset.cameras.spatial_operations import _HomogeneousPoints
from robotblockset.rbs_typing import CameraIntrinsicsMatrixType, CameraExtrinsicMatrixType, Vector2DArrayType, Vector3DArrayType, Vector3DType, HomogeneousMatrixType, NumpyDepthMapType


[docs] def project_points_to_image_plane( positions_in_camera_frame: Union[Vector3DArrayType, Vector3DType], camera_intrinsics: CameraIntrinsicsMatrixType, ) -> Vector2DArrayType: """ Project 3D camera-frame points to image pixels. Parameters ---------- positions_in_camera_frame : Union[Vector3DArrayType, Vector3DType] 3D point positions in the camera frame. camera_intrinsics : CameraIntrinsicsMatrixType Camera intrinsics matrix with shape ``(3, 3)``. Returns ------- Vector2DArrayType Pixel coordinates on the image plane. """ homogeneous_positions_in_camera_frame = _HomogeneousPoints(positions_in_camera_frame).homogeneous_points.T homogeneous_positions_on_image_plane = camera_intrinsics @ homogeneous_positions_in_camera_frame[:3, ...] positions_on_image_plane = homogeneous_positions_on_image_plane[:2, ...] / homogeneous_positions_on_image_plane[2, ...] return positions_on_image_plane.T
[docs] def multiview_triangulation_midpoint( extrinsics_matrices: List[CameraExtrinsicMatrixType], intrinsics_matrices: List[CameraIntrinsicsMatrixType], image_coordinates: Vector2DArrayType, ) -> Vector3DType: """ Triangulate a point from multiple views using the midpoint method. Parameters ---------- extrinsics_matrices : List[CameraExtrinsicMatrixType] Extrinsics matrices for each viewpoint. intrinsics_matrices : List[CameraIntrinsicsMatrixType] Intrinsics matrices for each viewpoint. image_coordinates : Vector2DArrayType Image coordinates of the 3D point for each viewpoint. Returns ------- Vector3DType Estimated 3D position in the world frame. """ # determine the rays for each camera in the world frame rays = [] for extrinsics_matrix, intrinsics_matrix, image_point in zip(extrinsics_matrices, intrinsics_matrices, image_coordinates): ray = extrinsics_matrix[:3, :3] @ np.linalg.inv(intrinsics_matrix) @ np.array([image_point[0], image_point[1], 1]) ray = ray / np.linalg.norm(ray) rays.append(ray) lhs = 0 rhs = 0 for i, ray in enumerate(rays): rhs += (np.eye(3) - ray[:, np.newaxis] @ ray[np.newaxis, :]) @ extrinsics_matrices[i][:3, 3] lhs += np.eye(3) - ray[:, np.newaxis] @ ray[np.newaxis, :] lhs_inv = np.linalg.inv(lhs) midpoint = lhs_inv @ rhs return midpoint
[docs] def calculate_triangulation_errors( extrinsics_matrices: List[CameraExtrinsicMatrixType], intrinsics_matrices: List[CameraIntrinsicsMatrixType], image_coordinates: Vector2DArrayType, point: Vector3DType, ) -> List[float]: """ Compute per-view triangulation errors. Parameters ---------- extrinsics_matrices : List[CameraExtrinsicMatrixType] Extrinsics matrices for each viewpoint. intrinsics_matrices : List[CameraIntrinsicsMatrixType] Intrinsics matrices for each viewpoint. image_coordinates : Vector2DArrayType Image coordinates of the 3D point for each viewpoint. point : Vector3DType Estimated 3D point in the world frame. Returns ------- List[float] Euclidean distances between the point and each viewing ray. """ errors = [] for extrinsics_matrix, intrinsics_matrix, image_point in zip(extrinsics_matrices, intrinsics_matrices, image_coordinates): ray = extrinsics_matrix[:3, :3] @ np.linalg.inv(intrinsics_matrix) @ np.array([image_point[0], image_point[1], 1]) ray = ray / np.linalg.norm(ray) error = np.linalg.norm((np.eye(3) - ray[:, np.newaxis] @ ray[np.newaxis, :]) @ ((extrinsics_matrix[:3, 3]) - point)) errors.append(error.item()) return errors
"""methods for getting from 2D image coordinates to 3D world coordinates using depth information (a.k.a. unprojection)"""
[docs] def unproject_using_depthmap( image_coordinates: Vector2DArrayType, depth_map: NumpyDepthMapType, camera_intrinsics: CameraIntrinsicsMatrixType, depth_heuristic_mask_size: int = 11, depth_heuristic_percentile: float = 0.05, ) -> np.ndarray: """ Unprojects image coordinates to 3D positions using a depth map. Parameters ---------- image_coordinates : Vector2DArrayType numpy array of shape (N, 2) containing the 2D pixel coordinates of the points on the image plane depth_map : NumpyDepthMapType numpy array of shape (height, width) containing the depth values for each pixel, i.e. the z-value of the 3D point in the camera frame corresponding to the pixel camera_intrinsics : CameraIntrinsicsMatrixType camera intrinsics matrix as a numpy array of shape (3, 3) Returns ------- numpy array of shape (N, 3) containing the 3D positions of the points in the camera frame """ # TODO: should we make this extraction method more generic? though I prefer to keep it simple and not add too many options depth_values = extract_depth_from_depthmap_heuristic(image_coordinates, depth_map, depth_heuristic_mask_size, depth_heuristic_percentile) return unproject_onto_depth_values(image_coordinates, depth_values, camera_intrinsics)
[docs] def unproject_onto_depth_values( image_coordinates: Vector2DArrayType, depth_values: np.ndarray, camera_intrinsics: CameraIntrinsicsMatrixType, ) -> np.ndarray: """ Unprojects image coordinates to 3D positions using depth values for each coordinate. Parameters ---------- image_coordinates : Vector2DArrayType numpy array of shape (N, 2) containing the 2D pixel coordinates of the points on the image plane depth_values : np.ndarray numpy array of shape (N,) containing the depth values for each pixel, i.e. the z-value of the 3D point in the camera frame corresponding to the pixel camera_intrinsics : CameraIntrinsicsMatrixType camera intrinsics matrix as a numpy array of shape (3, 3) Returns ------- numpy array of shape (N, 3) containing the 3D positions of the points in the camera frame """ if image_coordinates.shape[0] != depth_values.shape[0]: raise IndexError(f"coordinates and depth values must have the same length (but they are: {image_coordinates.shape[0]} and {depth_values.shape[0]})") homogeneous_coords = np.ones((image_coordinates.shape[0], 3)) homogeneous_coords[:, :2] = image_coordinates homogeneous_coords = np.transpose(homogeneous_coords) rays_in_camera_frame = np.linalg.inv(camera_intrinsics) @ homogeneous_coords # shape is cast by numpy to column vector! z_values_in_camera_frame = depth_values t = z_values_in_camera_frame / rays_in_camera_frame[2, :] positions_in_camera_frame = t * rays_in_camera_frame homogeneous_positions_in_camera_frame = _HomogeneousPoints(positions_in_camera_frame.T).homogeneous_points.T return homogeneous_positions_in_camera_frame[:3, ...].T
[docs] def unproject_onto_world_z_plane( image_coordinates: Vector2DArrayType, camera_intrinsics: CameraIntrinsicsMatrixType, camera_in_frame_pose: HomogeneousMatrixType, height: float, ) -> Vector3DArrayType: """ Unproject image points onto a world Z plane. Parameters ---------- image_coordinates : Vector2DArrayType Pixel coordinates on the image plane. camera_intrinsics : CameraIntrinsicsMatrixType Camera intrinsics matrix with shape ``(3, 3)``. camera_in_frame_pose : HomogeneousMatrixType Camera pose in the target frame. height : float Height of the plane in the target frame. Returns ------- Vector3DArrayType 3D points on the plane ``Z = height`` in the target frame. """ # convert to homogeneous coordinates and transpose to column vectors homogeneous_coords = np.ones((image_coordinates.shape[0], 3)) homogeneous_coords[:, :2] = image_coordinates homogeneous_coords = np.transpose(homogeneous_coords) camera_frame_ray_vector = np.linalg.inv(camera_intrinsics) @ homogeneous_coords translation = camera_in_frame_pose[0:3, 3] rotation_matrix = camera_in_frame_pose[0:3, 0:3] world_frame_ray_vectors = rotation_matrix @ camera_frame_ray_vector world_frame_ray_vectors = np.transpose(world_frame_ray_vectors) t = (height - translation[2]) / world_frame_ray_vectors[:, 2] points = t[:, np.newaxis] * world_frame_ray_vectors + translation return points
[docs] def extract_depth_from_depthmap_heuristic( image_coordinates: Vector2DArrayType, depth_map: NumpyDepthMapType, mask_size: int = 11, depth_percentile: float = 0.05, ) -> np.ndarray: """ A simple heuristic to get more robust depth values of the depth map. Especially with keypoints we are often interested in points on the edge of an object, or even worse on a corner. Not only are these regions noisy by themselves but the keypoints could also be be a little off. This function takes the percentile of a region around the specified point and assumes we are interested in the nearest object present. This is not always true (think about the backside of a box looking under a 45 degree angle) but it serves as a good proxy. The more confident you are of your keypoints and the better the heatmaps are, the lower you could set the mask size and percentile. If you are very, very confident you could directly take the point cloud as well instead of manually querying the heatmap, but I find that they are more noisy. Also note that this function assumes there are no negative infinity values (no objects closer than 30cm!) Returns ------- (np.ndarray) a 1D array of the depth values for the specified coordinates """ if mask_size % 2 == 0: raise ValueError("only odd sized markers allowed") if depth_percentile >= 0.25: # TODO: The question in this error message implies that we should not raise an error, but instead log a warning. raise ValueError("For straight corners, about 75 percent of the region will be background. Are your sure you want the percentile to be lower?") # check all coordinates are within the size of the depth map to avoid unwanted wrapping of the array indices if np.max(image_coordinates[:, 1]) >= depth_map.shape[0]: raise IndexError("V coordinates out of bounds") if np.max(image_coordinates[:, 0]) >= depth_map.shape[1]: raise IndexError("U coordinates out of bounds") if np.min(image_coordinates) < 0: raise IndexError("coordinates out of bounds") # convert coordinates to integers image_coordinates = image_coordinates.astype(np.int32) # extract depth values by taking the percentile of the depth values in a region around the point mask_size_squared = mask_size**2 depth_regions = np.empty((image_coordinates.shape[0], mask_size_squared)) for i in range(image_coordinates.shape[0]): # Calculate the desired mask boundaries (using int32 to avoid overflow) v_start_desired = int(image_coordinates[i, 1]) - mask_size // 2 v_end_desired = int(image_coordinates[i, 1]) + mask_size // 2 + 1 u_start_desired = int(image_coordinates[i, 0]) - mask_size // 2 u_end_desired = int(image_coordinates[i, 0]) + mask_size // 2 + 1 # Clip the mask boundaries to the image boundaries v_start = max(0, v_start_desired) v_end = min(depth_map.shape[0], v_end_desired) u_start = max(0, u_start_desired) u_end = min(depth_map.shape[1], u_end_desired) # Extract the valid depth region depth_region = depth_map[v_start:v_end, u_start:u_end] # Flatten and pad with NaN if the masked area is smaller than needed flattened_region = depth_region.flatten() if flattened_region.size < mask_size_squared: padded_region = np.full(mask_size_squared, np.nan) padded_region[: flattened_region.size] = flattened_region depth_regions[i, :] = padded_region else: depth_regions[i, :] = flattened_region depth_values = np.nanquantile(depth_regions, depth_percentile, axis=1) return depth_values