Source code for robotblockset.cameras.image_converter

"""Image array inspection and conversion utilities."""

from __future__ import annotations

import numpy as np
from robotblockset.rbs_typing import NumpyFloatImageType, NumpyIntImageType, OpenCVIntImageType, TorchFloatImageType
from typing_extensions import TypeGuard


[docs] def is_image_array(image: object) -> TypeGuard[np.ndarray]: """Check whether an object looks like an image array. checks if an object is a numpy array with 3 dimensions, which is the only thing all image formats have in common""" return isinstance(image, np.ndarray) and image.ndim == 3
[docs] def is_float_image_array(image: object) -> bool: """checks if an object is a valid float image array by checking - if it is a valid image array - if it contains floats - and if the first element is in the right range""" if is_image_array(image): valid = image.dtype in (np.float32, np.float64, np.float16) # check first pixel instead of global max to reduce computational burden # doing this for a 6M float image (1000x 2000 x3) takes a few ms valid = valid and image[0, 0, 0] <= 1.0 valid = valid and image[0, 0, 0] >= 0.0 else: valid = False return valid
[docs] def is_int_image_array(image: np.ndarray) -> bool: """checks if an object is a valid int image array by checking - if it is a valid image array - if it contains ints - and if the first element is in the right range""" valid = is_image_array(image) valid = valid and image.dtype in (np.uint8, np.uint16, np.uint32) # check first pixel instead of global max to reduce computational burden # doing this for a 6M int image (1000x 2000 x3) takes a few ms valid = valid and image[0, 0, 0] >= 0 valid = valid and image[0, 0, 0] <= 255 return valid
[docs] class ImageConverter: """ Utility class to convert between numpy arrays of different image formats. Only supports cpu-located images. Convert cuda images to cpu images (if you can afford it) or re-implement with torch. **Note** that these conversions may not be optimal, because we use an intermediate numpy float format. So, there may be a conversion from type A to type B that is faster if you do it directly, but we don't implement that here to keep implementation complexity low. See also https://github.com/airo-ugent/airo-mono/issues/132. """ def __init__(self, image_in_numpy_float_format: NumpyFloatImageType) -> None: if not is_float_image_array(image_in_numpy_float_format): raise TypeError("image_in_numpy_float_format must be a valid float image array") if image_in_numpy_float_format.shape[2] != 3: raise IndexError("image_in_numpy_float_format must have 3 channels in the last dimension") self._image_in_numpy_float_format = image_in_numpy_float_format
[docs] @classmethod def from_numpy_format(cls, image: NumpyFloatImageType) -> ImageConverter: if not is_float_image_array(image): raise TypeError("image must be a valid float image array") if image.shape[2] != 3: raise IndexError("image must have 3 channels in the last dimension") # create copy to avoid altering the input image image = np.copy(image) return ImageConverter(image)
[docs] @classmethod def from_numpy_int_format(cls, image: NumpyIntImageType) -> ImageConverter: if not is_int_image_array(image): raise TypeError("image must be a valid int image array") if image.shape[2] != 3: raise IndexError("image must have 3 channels in the last dimension") # convert to floats (creates a copy) image = image.astype(np.float32) / 255.0 return ImageConverter(image)
[docs] @classmethod def from_opencv_format(cls, image: OpenCVIntImageType) -> ImageConverter: if not is_int_image_array(image): raise TypeError("image must be a valid int image array") if image.shape[2] != 3: raise IndexError("image must have 3 channels in the last dimension") # convert to float (creates copy) # can take a few ms.. image = image.astype(np.float32) / 255.0 # convert BGR to RGB image = image[:, :, ::-1] return ImageConverter(image)
[docs] @classmethod def from_torch_format(cls, image: TorchFloatImageType) -> ImageConverter: if not is_float_image_array(image): raise TypeError("image must be a valid float image array") if image.shape[0] != 3: raise IndexError("image must have 3 channels in the first dimension") # create copy to avoid altering the input image image = np.copy(image) # channel first to channel last image = np.transpose(image, (1, 2, 0)) return ImageConverter(image)
@property def image_in_numpy_format(self) -> NumpyFloatImageType: return self._image_in_numpy_float_format @property def image_in_opencv_format(self) -> OpenCVIntImageType: image = self._image_in_numpy_float_format[:, :, ::-1] # can take up to a few ms. image *= 255.0 return image.astype(np.uint8) @property def image_in_torch_format(self) -> TorchFloatImageType: return np.transpose(self._image_in_numpy_float_format, (2, 0, 1)) @property def image_in_numpy_int_format(self) -> NumpyIntImageType: return (self._image_in_numpy_float_format * 255.0).astype(np.uint8)