Source code for tensorbay.opendataset.RP2K.loader
#!/usr/bin/env python3
#
# Copytright 2021 Graviti. Licensed under MIT License.
#
# pylint: disable=invalid-name
"""Dataloader of RP2K dataset."""
import os
from glob import glob
from typing import Iterable, List
from tensorbay.dataset import Data, Dataset
from tensorbay.label import Classification
DATASET_NAME = "RP2K"
[docs]def RP2K(path: str) -> Dataset:
"""`RP2K <https://www.pinlandata.com/rp2k_dataset>`_ dataset.
The file structure of RP2K looks like::
<path>
all/
test/
<catagory>/
<image_name>.jpg
...
...
train/
<catagory>/
<image_name>.jpg
...
...
Arguments:
path: The root directory of the dataset.
Returns:
Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.
"""
root_path = os.path.join(os.path.abspath(os.path.expanduser(path)), "all")
dataset = Dataset(DATASET_NAME)
dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json"))
for segment_name in ("train", "test"):
segment = dataset.create_segment(segment_name)
segment_path = os.path.join(root_path, segment_name)
categories = os.listdir(segment_path)
categories.sort()
for category in categories:
category_dir = os.path.join(segment_path, category)
if not os.path.isdir(category_dir):
continue
image_paths = _glob(category_dir, ("*.jpg", "*.png"))
for image_path in image_paths:
remote_path = os.path.basename(image_path).replace(" ", "_")
data = Data(local_path=image_path, target_remote_path=remote_path)
data.label.classification = Classification(category)
segment.append(data)
return dataset
def _glob(category_dir: str, patterns: Iterable[str]) -> List[str]:
file_paths = []
for pattern in patterns:
file_paths.extend(glob(os.path.join(category_dir, pattern)))
file_paths.sort()
return file_paths