Source code for tensorbay.opendataset.CCPD.loader

#!/usr/bin/env python3
#
# Copyright 2021 Graviti. Licensed under MIT License.
#
# pylint: disable=invalid-name

"""Dataloader of CCPD dataset."""

import os
from typing import Dict, Iterator, List, Union

from tensorbay.dataset import Data, Dataset
from tensorbay.label import LabeledPolygon
from tensorbay.opendataset._utility.glob import glob

_PROVINCES = (
    "皖",
    "沪",
    "津",
    "渝",
    "冀",
    "晋",
    "蒙",
    "辽",
    "吉",
    "黑",
    "苏",
    "浙",
    "京",
    "闽",
    "赣",
    "鲁",
    "豫",
    "鄂",
    "湘",
    "粤",
    "桂",
    "琼",
    "川",
    "贵",
    "云",
    "藏",
    "陕",
    "甘",
    "青",
    "宁",
    "新",
    "警",
    "学",
)

_CODES = (
    "A",
    "B",
    "C",
    "D",
    "E",
    "F",
    "G",
    "H",
    "J",
    "K",
    "L",
    "M",
    "N",
    "P",
    "Q",
    "R",
    "S",
    "T",
    "U",
    "V",
    "W",
    "X",
    "Y",
    "Z",
)

_ADS = (
    "A",
    "B",
    "C",
    "D",
    "E",
    "F",
    "G",
    "H",
    "J",
    "K",
    "L",
    "M",
    "N",
    "P",
    "Q",
    "R",
    "S",
    "T",
    "U",
    "V",
    "W",
    "X",
    "Y",
    "Z",
    "0",
    "1",
    "2",
    "3",
    "4",
    "5",
    "6",
    "7",
    "8",
    "9",
)

_CCPD_SEGMENTS = {
    "other": ("np", "weather"),
    "train": ("base",),
    "val": ("base",),
    "test": ("blur", "challenge", "db", "fn", "rotate", "tilt"),
}

_CCPDGREEN_SEGMENTS = ("train", "val", "test")

DATASET_NAME_CCPD = "CCPD"
DATASET_NAME_CCPDGREEN = "CCPDGreen"


[docs]def CCPD(path: str) -> Dataset: """`CCPD <https://github.com/detectRecog/CCPD>`_ dataset. The file structure should be like:: <path> ccpd_np/ 1005.jpg 1019.jpg ... ccpd_base/ 00205459770115-90_85-352&516_448&547- \ 444&547_368&549_364&517_440&515-0_0_22_10_26_29_24-128-7.jpg 00221264367816-91_91-283&519_381&553- \ 375&551_280&552_285&514_380&513-0_0_7_26_17_33_29-95-9.jpg ... ccpd_blur/ ccpd_challenge/ ccpd_db/ ccpd_fn/ ccpd_rotate/ ccpd_tilt/ ccpd_weather/ LICENSE README.md splits/ ccpd_blur.txt ccpd_challenge.txt ccpd_db.txt ccpd_fn.txt ccpd_rotate.txt ccpd_tilt.txt test.txt train.txt val.txt Arguments: path: The root directory of the dataset. Returns: Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.abspath(os.path.expanduser(path)) dataset = Dataset(DATASET_NAME_CCPD) dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json")) for segment_head, segment_tails in _CCPD_SEGMENTS.items(): for segment_tail in segment_tails: segment_name = f"{segment_head}-{segment_tail}" segment = dataset.create_segment(segment_name) get_polygons = _get_polygons if segment_name != "other-np" else lambda _: [] for image_path in _get_ccpd_image_path(root_path, segment_head, segment_tail): data = Data(image_path) data.label.polygon = get_polygons(image_path) segment.append(data) return dataset
[docs]def CCPDGreen(path: str) -> Dataset: """`CCPDGreen <https://github.com/detectRecog/CCPD>`_ dataset. The file structure should be like:: <path> ccpd_green/ train/ test/ val/ Arguments: path: The root directory of the dataset. Returns: Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance. """ root_path = os.path.join(os.path.abspath(os.path.expanduser(path)), "ccpd_green") dataset = Dataset(DATASET_NAME_CCPDGREEN) dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json")) for segment_name in _CCPDGREEN_SEGMENTS: segment = dataset.create_segment(segment_name) for image_path in glob(os.path.join(root_path, segment_name, "*.jpg")): data = Data(image_path) data.label.polygon = _get_polygons(image_path) segment.append(data) return dataset
def _get_license_plate(license_index: str) -> str: indexes = map(int, license_index.split("_")) return ( f"{_PROVINCES[next(indexes)]}{_CODES[next(indexes)]}" f"{''.join(_ADS[index] for index in indexes)}" ) def _get_polygons(image_path: str) -> List[LabeledPolygon]: attributes: Dict[str, Union[int, str]] = {} annotations = os.path.splitext(os.path.basename(image_path))[0].split("-", 6) points = (map(int, point.split("&")) for point in annotations[3].split("_", 3)) tilt_degree = annotations[1].split("_", 1) attributes["horizontal_tilt"] = int(tilt_degree[0]) attributes["vertical_tilt"] = int(tilt_degree[1]) attributes["license_plate"] = _get_license_plate(annotations[4]) attributes["brightness"] = int(annotations[5]) attributes["blurriness"] = int(annotations[6]) return [LabeledPolygon(points, attributes=attributes)] def _get_ccpd_image_path(root_path: str, segment_head: str, segment_tail: str) -> Iterator[str]: if segment_tail == "base": file_path = os.path.join(root_path, "splits", f"{segment_head}.txt") with open(file_path, encoding="utf-8") as fp: for image_path in fp: yield os.path.join(root_path, image_path.strip()) else: for image_path in glob(os.path.join(root_path, f"ccpd_{segment_tail}", "*.jpg")): yield image_path