Source code for tensorbay.opendataset.COVIDChestXRay.loader
#!/usr/bin/env python3
#
# Copyright 2021 Graviti. Licensed under MIT License.
#
# pylint: disable=invalid-name
"""Dataloader of COVID-chestxray dataset."""
import csv
import os
from typing import Any, Dict
from tensorbay.dataset import Data, Dataset
from tensorbay.label import Classification
DATASET_NAME = "COVID-chestxray"
_INTEGER_GROUP = {"offset", "age"}
_FLOAT_GROUP = {
"temperature",
"pO2_saturation",
"leukocyte_count",
"neutrophil_count",
"lymphocyte_count",
}
[docs]def COVIDChestXRay(path: str) -> Dataset:
"""`COVID-chestxray <https://github.com/ieee8023/covid-chestxray-dataset>`_ dataset.
The file structure should be like::
<path>
images/
0a7faa2a.jpg
000001-2.png
000001-3.jpg
1B734A89-A1BF-49A8-A1D3-66FAFA4FAC5D.jpeg
...
volumes/
coronacases_org_001.nii.gz
....
metadata.csv
...
Arguments:
path: The root directory of the dataset.
Returns:
Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.
"""
root_path = os.path.abspath(os.path.expanduser(path))
dataset = Dataset(DATASET_NAME)
dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json"))
segment = dataset.create_segment()
csv_path = os.path.join(root_path, "metadata.csv")
with open(csv_path, encoding="utf-8") as fp:
csv_reader = csv.DictReader(fp)
for attributes in csv_reader:
folder = attributes.pop("folder")
# The 20 images invovled in "volumes" folder currently are invalid to download.
if folder == "volumes":
continue
image_path = os.path.join(root_path, folder, attributes.pop("filename"))
category = attributes.pop("finding").strip()
data = Data(image_path)
data.label.classification = Classification(
category=category, attributes=_convert_type(attributes)
)
segment.append(data)
return dataset
def _convert_type(attributes: Dict[str, Any]) -> Dict[str, Any]:
for key, value in attributes.items():
if value == "":
attributes[key] = None
elif key in _INTEGER_GROUP:
attributes[key] = int(value)
elif key in _FLOAT_GROUP:
attributes[key] = float(value)
return attributes