Getting started with MNIST
MNIST is something like a hello-world dataset of deep learning community. In this example we will look at how to get real data into the pipeline and how to sample it. The full code for the example is available here, we will just commend on the core parts.
Sampling MNIST data
import os
import struct
from itertools import count
import cv2
import numpy as np
from pixelpipes.graph import RandomSeed, Constant
from pixelpipes.list import GetRandom
from pixelpipes.image import ConvertDepth
from pixelpipes.utilities import pipeline
@pipeline()
def mnist():
root = os.path.dirname(__file__)
images_file = os.path.join(root, "_data", "mnist", "train-images.idx3-ubyte")
labels_file = os.path.join(root, "_data", "mnist", "train-labels.idx1-ubyte")
images = []
labels = []
with open(images_file, mode="rb") as h:
h.read(4) # Magic number
count, height, width = struct.unpack(">III", h.read(12))
for _ in range(count):
images.append(np.frombuffer(h.read(width * height), dtype=np.uint8).reshape(height, width))
with open(labels_file, mode="rb") as h:
h.read(4) # Magic number
count, = struct.unpack(">i", h.read(4))
for _ in range(count):
labels.append(struct.unpack("B", h.read(1))[0])
assert len(images) == len(labels)
# Pipeline starts here
l = Constant(labels)
i = Constant(images)
s = RandomSeed() # Both label and image should be sampled the same way, we are binding the same random seed
return ConvertDepth(GetRandom(i, seed=s), "Float"), GetRandom(l, seed=s)
if __name__ == "__main__":
# Download original train or test files from http://yann.lecun.com/exdb/mnist/
# unzip them and point the paths below to the final files
stream = mnist()
for image, label in stream:
print(label)
cv2.imshow("Patch", (image * 255).astype(np.uint8))
if cv2.waitKey() != 32:
break