Source code for datamodules.RotNet.datasets.cropped_dataset

"""
Load a dataset of historic documents by specifying the folder where its located.
"""

# Utils
from pathlib import Path
from typing import List, Union, Optional, Tuple

from omegaconf import ListConfig
from PIL import Image
from torch import is_tensor, Tensor
from torchvision.datasets.folder import has_file_allowed_extension, pil_loader
from torchvision.transforms import ToTensor

from src.datamodules.DivaHisDB.datasets.cropped_dataset import CroppedHisDBDataset
from src.datamodules.utils.misc import selection_validation
from src.datamodules.utils.single_transforms import RightAngleRotation
from src.utils import utils

IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm')

log = utils.get_logger(__name__)


[docs]class CroppedRotNet(CroppedHisDBDataset):
    """
    Dataset implementation of the RotNet paper of `Gidaris et al. <https://arxiv.org/abs/1803.07728>`_. This
    dataset is used for the DivaHisDB dataset in a cropped setup.

    The structure of the folder should be as follows::

         data_dir
        ├── train_folder_name
        │   ├── data_folder_name
        │   │   ├── original_image_name_1
        │   │   │   ├── image_crop_1.png
        │   │   │   ├── ...
        │   │   │   └── image_crop_N.png
        │   │   └──original_image_name_N
        │   │       ├── image_crop_1.png
        │   │       ├── ...
        │   │       └── image_crop_N.png
        │   └── gt_folder_name
        │       ├── original_image_name_1
        │       │   ├── image_crop_1.png
        │       │   ├── ...
        │       │   └── image_crop_N.png
        │       └──original_image_name_N
        │           ├── image_crop_1.png
        │           ├── ...
        │           └── image_crop_N.png
        ├── validation_folder_name
        │   ├── data_folder_name
        │   │   ├── original_image_name_1
        │   │   │   ├── image_crop_1.png
        │   │   │   ├── ...
        │   │   │   └── image_crop_N.png
        │   │   └──original_image_name_N
        │   │       ├── image_crop_1.png
        │   │       ├── ...
        │   │       └── image_crop_N.png
        │   └── gt_folder_name
        │       ├── original_image_name_1
        │       │   ├── image_crop_1.png
        │       │   ├── ...
        │       │   └── image_crop_N.png
        │       └──original_image_name_N
        │           ├── image_crop_1.png
        │           ├── ...
        │           └── image_crop_N.png
        └── test_folder_name
            ├── data_folder_name
            │   ├── original_image_name_1
            │   │   ├── image_crop_1.png
            │   │   ├── ...
            │   │   └── image_crop_N.png
            │   └──original_image_name_N
            │       ├── image_crop_1.png
            │       ├── ...
            │       └── image_crop_N.png
            └── gt_folder_name
                ├── original_image_name_1
                │   ├── image_crop_1.png
                │   ├── ...
                │   └── image_crop_N.png
                └──original_image_name_N
                    ├── image_crop_1.png
                    ├── ...
                    └── image_crop_N.png

    :param path: Path to root dir of the dataset (folder containing the train/val/test folder)
    :type path: Path
    :param data_folder_name: Name of the folder containing the train/val/test folder
    :type data_folder_name: str
    :param gt_folder_name: Name of the folder containing the train/val/test folder
    :type gt_folder_name: str
    :param selection: If you only want to use a subset of the dataset, you can specify the name of the files
        (without the file extension) in a list. If you want to use all files, set this parameter to None.
    :type selection: Union[int, List[str]]
    :param is_test: If True, the it returns additional information that are important for the test set.
    :type is_test: bool
    :param image_transform:
    """

    def __init__(self, path: Path, data_folder_name: str, gt_folder_name: str = None,
                 selection: Optional[Union[int, List[str]]] = None,
                 is_test: bool = False, image_transform: callable = None):
        """
        Constructor method of the class RotNetDataset.
        """
        super(CroppedRotNet, self).__init__(path=path, data_folder_name=data_folder_name, gt_folder_name=gt_folder_name,
                                            selection=selection,
                                            is_test=is_test, image_transform=image_transform,
                                            target_transform=None, twin_transform=None)

    def __getitem__(self, index: int) -> Tuple[Tensor, int]:
        """
        This function returns the image and the ground truth for a given index.

        :param index: index of the image
        :type index: int
        :return: the image and the ground truth
        :rtype: Tuple[Tensor, int]
        """
        data_img = self._load_data_and_gt(index=index)
        img, gt = self._apply_transformation(data_img, index=index)
        return img, gt

    def __len__(self):
        """
        This function returns the length of an epoch so the data loader knows when to stop.
        The length is different during train/val and test, because we process the whole image during testing,
        and only sample from the images during train/val.
        """
        return self.num_samples

    def _load_data_and_gt(self, index: int) -> Image.Image:
        """
        Loads the image for a given index.

        :param index: index of the image to be loaded
        :type index: int
        :return: the image
        :rtype: Image.Image
        """

        data_img = pil_loader(self.img_paths_per_page[index])
        return data_img

    def _apply_transformation(self, img: Image.Image, index: int) -> Tuple[Tensor, int]:
        """
        Applies the transformations that have been defined in the setup (setup.py). If no transformations
        have been defined, the PIL image is returned instead.

        :param img: PIL image of the codex
        :type img: Image.Image
        :param index: index of the image to determine the rotation angle
        :type index: int
        """
        if self.twin_transform is not None and not self.is_test:
            img, _ = self.twin_transform(img, None)

        if self.image_transform is not None:
            # perform transformations
            img, _ = self.image_transform(img, None)

        if not is_tensor(img):
            img = ToTensor()(img)

        rotation_transformation = RightAngleRotation()
        img = rotation_transformation(img)

        return img, rotation_transformation.target_class

[docs]    @staticmethod
    def get_gt_data_paths(directory: Path, data_folder_name: str, gt_folder_name: str = None,
                          selection: Optional[Union[int, List[str]]] = None) \
            -> List[Path]:
        """
        Creates the list of paths to the original images.

        Structure of the folder::

            dictionary
            ├── data_folder_name
            │   ├── original_image_name_1
            │   │   ├── image_crop_1.png
            │   │   ├── ...
            │   │   └── image_crop_N.png
            │   └──original_image_name_N
            │       ├── image_crop_1.png
            │       ├── ...
            │       └── image_crop_N.png
            └── gt_folder_name
                ├── original_image_name_1
                │   ├── image_crop_1.png
                │   ├── ...
                │   └── image_crop_N.png
                └──original_image_name_N
                    ├── image_crop_1.png
                    ├── ...
                    └── image_crop_N.png

        :param directory: Path to root dir of split
        :type directory: Path
        :param data_folder_name: Name of the folder containing the data
        :type data_folder_name: str
        :param gt_folder_name: Name of the folder containing the ground truth
        :type gt_folder_name: str
        :param selection: If you only want to use a subset of the dataset, you can specify the name of the files
            (without the file extension) in a list. If you want to use all files, set this parameter to None.
        :type selection: Union[int, List[str]]
        :return: List of paths to the original images
        :rtype: List[Path]
        """
        paths = []
        directory = directory.expanduser()

        path_data_root = directory / data_folder_name

        if not (path_data_root.is_dir()):
            log.error("folder data or gt not found in " + str(directory))

        # get all subitems (and files) sorted
        subitems = sorted(path_data_root.iterdir())

        # check the selection parameter
        if selection:
            selection = selection_validation(subitems, selection, full_page=False)

        counter = 0  # Counter for subdirectories, needed for selection parameter

        for path_data_subdir in subitems:
            if not path_data_subdir.is_dir():
                if has_file_allowed_extension(path_data_subdir.name, IMG_EXTENSIONS):
                    log.warning("image file found in data root: " + str(path_data_subdir))
                continue

            counter += 1

            if selection:
                if isinstance(selection, int):
                    if counter > selection:
                        break

                elif isinstance(selection, ListConfig) or isinstance(selection, list):
                    if path_data_subdir.name not in selection:
                        continue

            for path_data_file in sorted(path_data_subdir.iterdir()):
                if has_file_allowed_extension(path_data_file.name, IMG_EXTENSIONS):
                    paths.append(path_data_file)

        return paths