PaddlePaddle¶
This topic describes how to integrate TensorBay dataset with PaddlePaddle Pipeline using the DogsVsCats Dataset as an example.
The typical method to integrate TensorBay dataset with PaddlePaddle is to build a “Segment” class
derived from paddle.io.Dataset
.
from paddle.io import DataLoader, Dataset
from paddle.vision import transforms
from PIL import Image
from tensorbay import GAS
from tensorbay.dataset import Dataset as TensorBayDataset
class DogsVsCatsSegment(Dataset):
"""class for wrapping a DogsVsCats segment."""
def __init__(self, gas, segment_name, transform):
super().__init__()
self.dataset = TensorBayDataset("DogsVsCats", gas)
self.segment = self.dataset[segment_name]
self.category_to_index = self.dataset.catalog.classification.get_category_to_index()
self.transform = transform
def __len__(self):
return len(self.segment)
def __getitem__(self, idx):
data = self.segment[idx]
with data.open() as fp:
image_tensor = self.transform(Image.open(fp))
return image_tensor, self.category_to_index[data.label.classification.category]
Using the following code to create a PaddlePaddle dataloader and run it:
ACCESS_KEY = "Accesskey-*****"
to_tensor = transforms.ToTensor()
normalization = transforms.Normalize(mean=[0.485], std=[0.229])
my_transforms = transforms.Compose([to_tensor, normalization])
train_segment = DogsVsCatsSegment(GAS(ACCESS_KEY), segment_name="train", transform=my_transforms)
train_dataloader = DataLoader(train_segment, batch_size=4, shuffle=True, num_workers=0)
for index, (image, label) in enumerate(train_dataloader):
print(f"{index}: {label}")