Source code for torchflare.datasets.image_data

import glob
import pathlib
from typing import Callable, List, Optional, Union

import albumentations as A
import pandas as pd
import torch

from torchflare.datasets.core_utils import (
    apply_image_augmentations,
    get_iloc_cols,
    join_paths,
    open_image,
    to_tensor,
)
from torchflare.datasets.data_core import ItemReader


[docs]class ImageDataset(ItemReader):
    """Class to create the dataset for Image Classification."""

    def __init__(self, convert_mode: str, *args, **kwargs):
        super(ImageDataset, self).__init__(*args, **kwargs)
        self.convert_mode = convert_mode

    def apply_input_transforms(self, transforms: A.Compose, item) -> torch.Tensor:
        """Method to apply augmentations to images."""
        image = open_image(x=item, convert_mode=self.convert_mode)
        if transforms is not None:
            image = apply_image_augmentations(image, transforms)
        return to_tensor(image)

    def apply_target_transforms(self, transforms: Union[A.Compose, Callable], item) -> torch.Tensor:
        """Method to apply transformations on targets."""
        if transforms is not None:
            item = transforms(item)
        return to_tensor(item)

    # skipcq : PYL-W0221
[docs]    @classmethod
    def from_df(
        cls,
        df: pd.DataFrame,
        path: Union[str, pathlib.Path],
        input_columns: List[str],
        transforms: Optional[A.Compose] = None,
        convert_mode: str = "RGB",
        extension: str = None,
        **kwargs
    ):
        """Classmethod to read inputs from the given dataframe.

        Args:
            path: The path where images are saved.
            df: The dataframe containing the image name/ids, and the targets
            input_columns: A list containing name/names of the
                            image columns containing the image name/ids.
            transforms: The augmentations to be used on images.
            extension : The image file extension.
            convert_mode: The mode to be passed to PIL.Image.convert.

        Example:

            .. code-block:: python

                from torchflare.datasets import ImageDataset

                ds = ImageDataset.from_df(df = df,
                    path = "train/images",
                    input_columns = ['image_ids'],
                    transforms = A.Compose([A.Resize(256,256)]
                ).targets_from_df(target_columns = ["targets"])
        """
        path = pathlib.Path(path)
        files = get_iloc_cols(df, input_columns)
        files = join_paths(path=path, files=files, extension=extension)
        return cls(
            items=files,
            transforms=transforms,
            df=df,
            path=path,
            convert_mode=convert_mode,
            **kwargs
        )

    # skipcq : PYL-W0221
[docs]    @classmethod
    def from_folders(
        cls,
        path: Union[str, pathlib.Path],
        transforms: Optional[A.Compose] = None,
        convert_mode: str = "RGB",
        **kwargs
    ):
        """Classmethod to create pytorch dataset from folders.

        Args:
                path: The path where images are stored.
                transforms: The transforms to be applied to images.
                convert_mode: The mode to be passed to PIL.Image.convert.

        Note:
            Augmentations must be Compose objects from albumentations.

            The training directory structure should be as follows:
                train/class_1/xxx.jpg
                .
                .
                train/class_n/xxz.jpg
            The test directory structure should be as follows:
                test_dir/xxx.jpg
                test_dir/xyz.jpg
                test_dir/ppp.jpg

        Example:

            .. code-block:: python

                from torchflare.datasets import ImageDataset

                import albumentations as A

                ds = ImageDataset.from_folders(
                    path="/train/images",
                    transforms=A.Compose[A.Resize(256, 256)],
                    convert_mode="RGB"
                ).targets_from_folders(target_path="/train/images")
        """
        files = glob.glob(path + "/*/*")
        return cls(
            items=files, path=path, transforms=transforms, convert_mode=convert_mode, **kwargs
        )

    # skipcq : PYL-W0221
[docs]    @classmethod
    def from_csv(
        cls,
        csv_path: Union[str, pathlib.Path],
        path: Union[str, pathlib.Path],
        input_columns: List[str],
        transforms: Optional[A.Compose] = None,
        convert_mode: str = "RGB",
        extension: Optional[str] = None,
        **kwargs
    ):
        """Classmethod to read inputs from the given csv.

        Args:
            path: The path where images are saved.
            csv_path : Full path to the csv file.
            input_columns: A list containing names of the
                               image columns containing the image name/ids.
            transforms: The augmentations to be used on images.
            extension : The image file extension.
            convert_mode: The mode to be passed to PIL.Image.convert.

        Example:

            .. code-block:: python

                from torchflare.datasets import ImageDataset
                import albumentations as A

                ds = ImageDataset.from_csv(csv_path = "train/train.csv",
                    path = "train/images",
                    input_columns = ['image_ids'],
                    transforms = A.Compose([A.Resize(256,256)]
                   ).targets_from_df(target_columns = ["targets"])
        """
        df = pd.read_csv(csv_path)
        return cls.from_df(
            df=df,
            transforms=transforms,
            input_columns=input_columns,
            convert_mode=convert_mode,
            extension=extension,
            path=path,
        )


__all__ = ["ImageDataset"]