Source code for datamodules.RotNet.datasets.cropped_dataset

"""
Load a dataset of historic documents by specifying the folder where its located.
"""

# Utils
from pathlib import Path
from typing import List, Union, Optional, Tuple

from omegaconf import ListConfig
from PIL import Image
from torch import is_tensor, Tensor
from torchvision.datasets.folder import has_file_allowed_extension, pil_loader
from torchvision.transforms import ToTensor

from src.datamodules.DivaHisDB.datasets.cropped_dataset import CroppedHisDBDataset
from src.datamodules.utils.misc import selection_validation
from src.datamodules.utils.single_transforms import RightAngleRotation
from src.utils import utils

IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm')

log = utils.get_logger(__name__)


[docs]class CroppedRotNet(CroppedHisDBDataset): """ Dataset implementation of the RotNet paper of `Gidaris et al. <https://arxiv.org/abs/1803.07728>`_. This dataset is used for the DivaHisDB dataset in a cropped setup. The structure of the folder should be as follows:: data_dir ├── train_folder_name │ ├── data_folder_name │ │ ├── original_image_name_1 │ │ │ ├── image_crop_1.png │ │ │ ├── ... │ │ │ └── image_crop_N.png │ │ └──original_image_name_N │ │ ├── image_crop_1.png │ │ ├── ... │ │ └── image_crop_N.png │ └── gt_folder_name │ ├── original_image_name_1 │ │ ├── image_crop_1.png │ │ ├── ... │ │ └── image_crop_N.png │ └──original_image_name_N │ ├── image_crop_1.png │ ├── ... │ └── image_crop_N.png ├── validation_folder_name │ ├── data_folder_name │ │ ├── original_image_name_1 │ │ │ ├── image_crop_1.png │ │ │ ├── ... │ │ │ └── image_crop_N.png │ │ └──original_image_name_N │ │ ├── image_crop_1.png │ │ ├── ... │ │ └── image_crop_N.png │ └── gt_folder_name │ ├── original_image_name_1 │ │ ├── image_crop_1.png │ │ ├── ... │ │ └── image_crop_N.png │ └──original_image_name_N │ ├── image_crop_1.png │ ├── ... │ └── image_crop_N.png └── test_folder_name ├── data_folder_name │ ├── original_image_name_1 │ │ ├── image_crop_1.png │ │ ├── ... │ │ └── image_crop_N.png │ └──original_image_name_N │ ├── image_crop_1.png │ ├── ... │ └── image_crop_N.png └── gt_folder_name ├── original_image_name_1 │ ├── image_crop_1.png │ ├── ... │ └── image_crop_N.png └──original_image_name_N ├── image_crop_1.png ├── ... └── image_crop_N.png :param path: Path to root dir of the dataset (folder containing the train/val/test folder) :type path: Path :param data_folder_name: Name of the folder containing the train/val/test folder :type data_folder_name: str :param gt_folder_name: Name of the folder containing the train/val/test folder :type gt_folder_name: str :param selection: If you only want to use a subset of the dataset, you can specify the name of the files (without the file extension) in a list. If you want to use all files, set this parameter to None. :type selection: Union[int, List[str]] :param is_test: If True, the it returns additional information that are important for the test set. :type is_test: bool :param image_transform: """ def __init__(self, path: Path, data_folder_name: str, gt_folder_name: str = None, selection: Optional[Union[int, List[str]]] = None, is_test: bool = False, image_transform: callable = None): """ Constructor method of the class RotNetDataset. """ super(CroppedRotNet, self).__init__(path=path, data_folder_name=data_folder_name, gt_folder_name=gt_folder_name, selection=selection, is_test=is_test, image_transform=image_transform, target_transform=None, twin_transform=None) def __getitem__(self, index: int) -> Tuple[Tensor, int]: """ This function returns the image and the ground truth for a given index. :param index: index of the image :type index: int :return: the image and the ground truth :rtype: Tuple[Tensor, int] """ data_img = self._load_data_and_gt(index=index) img, gt = self._apply_transformation(data_img, index=index) return img, gt def __len__(self): """ This function returns the length of an epoch so the data loader knows when to stop. The length is different during train/val and test, because we process the whole image during testing, and only sample from the images during train/val. """ return self.num_samples def _load_data_and_gt(self, index: int) -> Image.Image: """ Loads the image for a given index. :param index: index of the image to be loaded :type index: int :return: the image :rtype: Image.Image """ data_img = pil_loader(self.img_paths_per_page[index]) return data_img def _apply_transformation(self, img: Image.Image, index: int) -> Tuple[Tensor, int]: """ Applies the transformations that have been defined in the setup (setup.py). If no transformations have been defined, the PIL image is returned instead. :param img: PIL image of the codex :type img: Image.Image :param index: index of the image to determine the rotation angle :type index: int """ if self.twin_transform is not None and not self.is_test: img, _ = self.twin_transform(img, None) if self.image_transform is not None: # perform transformations img, _ = self.image_transform(img, None) if not is_tensor(img): img = ToTensor()(img) rotation_transformation = RightAngleRotation() img = rotation_transformation(img) return img, rotation_transformation.target_class
[docs] @staticmethod def get_gt_data_paths(directory: Path, data_folder_name: str, gt_folder_name: str = None, selection: Optional[Union[int, List[str]]] = None) \ -> List[Path]: """ Creates the list of paths to the original images. Structure of the folder:: dictionary ├── data_folder_name │ ├── original_image_name_1 │ │ ├── image_crop_1.png │ │ ├── ... │ │ └── image_crop_N.png │ └──original_image_name_N │ ├── image_crop_1.png │ ├── ... │ └── image_crop_N.png └── gt_folder_name ├── original_image_name_1 │ ├── image_crop_1.png │ ├── ... │ └── image_crop_N.png └──original_image_name_N ├── image_crop_1.png ├── ... └── image_crop_N.png :param directory: Path to root dir of split :type directory: Path :param data_folder_name: Name of the folder containing the data :type data_folder_name: str :param gt_folder_name: Name of the folder containing the ground truth :type gt_folder_name: str :param selection: If you only want to use a subset of the dataset, you can specify the name of the files (without the file extension) in a list. If you want to use all files, set this parameter to None. :type selection: Union[int, List[str]] :return: List of paths to the original images :rtype: List[Path] """ paths = [] directory = directory.expanduser() path_data_root = directory / data_folder_name if not (path_data_root.is_dir()): log.error("folder data or gt not found in " + str(directory)) # get all subitems (and files) sorted subitems = sorted(path_data_root.iterdir()) # check the selection parameter if selection: selection = selection_validation(subitems, selection, full_page=False) counter = 0 # Counter for subdirectories, needed for selection parameter for path_data_subdir in subitems: if not path_data_subdir.is_dir(): if has_file_allowed_extension(path_data_subdir.name, IMG_EXTENSIONS): log.warning("image file found in data root: " + str(path_data_subdir)) continue counter += 1 if selection: if isinstance(selection, int): if counter > selection: break elif isinstance(selection, ListConfig) or isinstance(selection, list): if path_data_subdir.name not in selection: continue for path_data_file in sorted(path_data_subdir.iterdir()): if has_file_allowed_extension(path_data_file.name, IMG_EXTENSIONS): paths.append(path_data_file) return paths