Load a dataset of historic documents by specifying the folder where its located.

# Utils
import re
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import List, Tuple, Union

import as data
from torch import is_tensor
from PIL import Image
from torchvision.datasets.folder import pil_loader
from torchvision.transforms import ToTensor

from src.datamodules.utils.misc import ImageDimensions, get_output_file_list
from src.utils import utils

IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.gif')

log = utils.get_logger(__name__)

[docs]@dataclass class DatasetSpecs: """ This class is used to specify the location of the data and ground truth files. It can also be used to specify a range of files that should be used. This is useful if you want to split the data into train/val/test and want to use the same data root for all three splits. """ data_root: str doc_dir: str doc_names: str gt_dir: str gt_names: str range_from: int range_to: int
[docs]class DatasetRolfFormat(data.Dataset): """ Dataset that loads the data in the Rolf format. Each file name has a fixed structure of `name_{file_number}.jpg`. The file number is a number between 0 and 9999. The different splits are defined by giving a range and a root folder for each split. :param dataset_specs: The dataset specs that specify the location of the data and ground truth files. :type dataset_specs: List[DatasetSpecs] :param image_dims: The dimensions of the images. :type image_dims: ImageDimensions :param is_test: Is it the test dataset? :type is_test: bool :param image_transform: Transformations that should be applied to the image. :type image_transform: callable :param target_transform: Transformations that should be applied to the ground truth. :type target_transform: callable :param twin_transform: Transformations that should be applied to both the image and the ground truth. :type twin_transform: callable """ def __init__(self, dataset_specs: List[DatasetSpecs], image_dims: ImageDimensions, is_test: bool = False, image_transform: callable = None, target_transform: callable = None, twin_transform: callable = None): """ Constructor method for the DatasetRolfFormat class. """ self.dataset_specs = dataset_specs self.image_dims = image_dims # transformations self.image_transform = image_transform self.target_transform = target_transform self.twin_transform = twin_transform self.is_test = is_test # List of tuples that contain the path to the gt and image that belong together self.img_gt_path_list = self.get_img_gt_path_list(list_specs=self.dataset_specs) if is_test: self.image_path_list = [img_gt_path[0] for img_gt_path in self.img_gt_path_list] self.output_file_list = get_output_file_list(image_path_list=self.image_path_list) self.num_samples = len(self.img_gt_path_list) assert self.num_samples > 0 def __len__(self): """ This function returns the length of an epoch so the data loader knows when to stop. The length is different during train/val and test, because we process the whole image during testing, and only sample from the images during train/val. """ return self.num_samples def __getitem__(self, index: int) -> Union[Tuple[Image.Image, Image.Image], Tuple[Image.Image, Image.Image, int]]: """ This function returns the image and the ground truth for a given index. If it is the test dataset, :param index: The index of the sample that should be returned. :type index: int :return: The image and the ground truth for the given index. :rtype: tuple """ if self.is_test: return self._get_test_items(index=index) else: return self._get_train_val_items(index=index) def _get_train_val_items(self, index: int) -> Tuple[Image.Image, Image.Image]: """ This function returns the image and the ground truth for a given index. :param index: The index of the sample that should be returned. :type index: int :return: The image and the ground truth for the given index. :rtype: tuple """ data_img, gt_img = self._load_data_and_gt(index=index) img, gt = self._apply_transformation(data_img, gt_img) return img, gt def _get_test_items(self, index: int) -> Tuple[Image.Image, Image.Image, int]: """ This function returns the image and the ground truth for a given index. :param index: The index of the sample that should be returned. :type index: int :return: The image and the ground truth for the given index with the index. :rtype: tuple :return: """ data_img, gt_img = self._load_data_and_gt(index=index) img, gt = self._apply_transformation(data_img, gt_img) return img, gt, index def _load_data_and_gt(self, index: int) -> Tuple[Image.Image, Image.Image]: """ This function loads the image and the ground truth for a given index. :param index: The index of the sample that should be returned. :type index: int :return: The image and the ground truth for the given index. :rtype: tuple """ data_img = pil_loader(str(self.img_gt_path_list[index][0])) gt_img = pil_loader(str(self.img_gt_path_list[index][1])) assert data_img.height == self.image_dims.height and data_img.width == self.image_dims.width assert gt_img.height == self.image_dims.height and gt_img.width == self.image_dims.width return data_img, gt_img def _apply_transformation(self, img: Image.Image, gt: Image.Image) -> Tuple[Image.Image, Image.Image]: """ Applies the transformations that have been defined in the setup ( If no transformations have been defined, the PIL image is returned instead. :param img: The original image onto which the transformations should be applied. :type img: Image.Image :param gt: The ground truth onto which the transformations should be applied. :type gt: Image.Image :return: The transformed image and ground truth. :rtype: Tuple[Image.Image, Image.Image] """ if self.twin_transform is not None and not self.is_test: img, gt = self.twin_transform(img, gt) if self.image_transform is not None: # perform transformations img, gt = self.image_transform(img, gt) if not is_tensor(img): img = ToTensor()(img) if not is_tensor(gt): gt = ToTensor()(gt) if self.target_transform is not None: img, gt = self.target_transform(img, gt) return img, gt @staticmethod def _get_paths_from_specs(data_root: str, doc_dir: str, doc_names: str, gt_dir: str, gt_names: str, range_from: int, range_to: int) -> List[Tuple[Path, Path]]: """ This function returns a list of tuples that contain the path to the gt and image that belong together. :param data_root: The root where the data is located. :type data_root: str :param doc_dir: The directory where the images are located. :type doc_dir: str :param doc_names: The name of the images. :type doc_names: str :param gt_dir: The directory where the ground truth is located. :type gt_dir: str :param gt_names: The name of the ground truth. :type gt_names: str :param range_from: The first index of the range that should be used. :type range_from: int :param range_to: The last index of the range that should be used. :type range_to: int :return: A list of tuples that contain the path to the gt and image that belong together. :rtype: List[Tuple[Path, Path]] """ path_root = Path(data_root) path_doc_dir = path_root / doc_dir path_gt_dir = path_root / gt_dir if not path_doc_dir.is_dir(): log.error(f'Document directory not found ("{path_doc_dir}")!') if not path_gt_dir.is_dir(): log.error(f'Ground Truth directory not found ("{path_gt_dir}")!') p = re.compile('#+') # assert that there is exactly one placeholder group assert len(p.findall(doc_names)) == 1 assert len(p.findall(gt_names)) == 1 search_doc_names = doc_prefix = doc_names[:search_doc_names.span(0)[0]] doc_suffix = doc_names[search_doc_names.span(0)[1]:] doc_number_length = len( search_gt_names = gt_prefix = gt_names[:search_gt_names.span(0)[0]] gt_suffix = gt_names[search_gt_names.span(0)[1]:] gt_number_length = len( paths = [] for i in range(range_from, range_to + 1): doc_filename = f'{doc_prefix}{i:0{doc_number_length}d}{doc_suffix}' path_doc_file = path_doc_dir / doc_filename gt_filename = f'{gt_prefix}{i:0{gt_number_length}d}{gt_suffix}' path_gt_file = path_gt_dir / gt_filename assert path_doc_file.exists() == path_gt_file.exists() if path_doc_file.exists() and path_gt_file.exists(): paths.append((path_doc_file, path_gt_file)) assert len(paths) > 0 return paths
[docs] @staticmethod def get_img_gt_path_list(list_specs: List[DatasetSpecs]) -> List[Tuple[Path, Path]]: """ Returns a list of tuples that contain the path to the gt and image that belong together. :param list_specs: The dataset specs that specify the location of the data and ground truth files. :type list_specs: List[DatasetSpecs] :return: A list of tuples that contain the path to the gt and image that belong together. :rtype: List[Tuple[Path, Path]] """ paths = [] for specs in list_specs: paths += DatasetRolfFormat._get_paths_from_specs(**asdict(specs)) return paths