智力活动是一种生活态度 https://mountaye.github.io/blog/

.py | What a PyTorch machine learning project looks like

Aug 18, 2022

A note of a pytorch tutorial on the official website. The original text is written in native python as much as possible according to the first principles, and then refactored step by step into code that is close to the production environment. Here I reverse the order and release the final result after refactoring first. (Do the engineers at Matters really plan to implement code highlighting~)

Self-study, or all learning and teaching, is essentially to build a road between the knowledge already mastered and the unknown target knowledge. There are two ways to practice the road. One is the theoretical or first principles route, starting from self-evident axioms or already mastered knowledge, and obtaining new knowledge step by step through logical reasoning; the other is the practice or engineer route. , get a working product, divide it into various subsystems, and observe the difference in output through the change of input, until the subsystem is simplified to the point that it can be understood by itself, and it is no longer a black box, so as to understand the function of the whole system.

However, when the object of study is complex to a certain extent, it is often difficult to drill through only one of the methods by virtue of one's self-learning ability. Or the routes learned by the two methods are not the same. For machine learning, the theoretical route is to "pass the input data through a function with many parameters, and modify the parameters according to the difference between the return value of the function and the output data, until the function can approximate the relationship between the input data and the output data"; In practice, the code often uses functions encapsulated by many library authors, and the read-only source code is often confused.

So, seeing this tutorial WHAT IS TORCH.NN REALLY ?: https://pytorch.org/tutorials/beginner/nn_tutorial.html on the official website of PyTorch can be said to be overjoyed, and the codes written for both routes are given. , for the autodidact, like the Rosetta Stone can be compared to each other. Here I have removed the relevant parts of CNN. After all, CNN is only a subset of deep learning, and deep learning is only a subset of machine learning, which has little to do with the topic of this article.

The original text was first written in native python according to the first principles, and then refactored step by step into code close to the production environment. Here I reverse the order and release the final result after refactoring first:

 from pathlib import Path
import requests
import pickle
import gzip
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch.utils.data import TensorDataset,DataLoader

# Using GPU

print(torch.cuda.is_available())
dev = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# Wrapping DataLoader
# https://pytorch.org/tutorials/beginner/basics/data_tutorial.html?highlight=dataloader
# https://pytorch.org/tutorials/beginner/data_loading_tutorial.html?highlight=dataloader

def preprocess(x, y):return x.view(-1, 1, 28, 28).to(dev), y.to(dev)

def get_data(train_ds, valid_ds, bs):return (DataLoader(train_ds, batch_size=bs, shuffle=True),DataLoader(valid_ds, batch_size=bs * 2),)

class WrappedDataLoader: def __init__(self, dl, func): self.dl = dlself.func = func

    def __len__(self):return len(self.dl)

    def __iter__(self):batches = iter(self.dl)for b in batches:yield(self.func(*b))

# Define the neural network model to be trained

# # If the model is simple:
# model = nn.Sequential(nn.Linear(784, 10))

# generally the model is a class that inherites nn.Module and implements forward()
class Mnist_Logistic(nn.Module):def __init__(self):super().__init__()# self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))
        # self.bias = nn.Parameter(torch.zeros(10))
        self.lin = nn.Linear(784, 10)

    def forward(self, xb): # return xb @ self.weights + self.bias
        return self.lin(xb)

# Define the training pipeline in fit()

def loss_batch(model, loss_func, xb, yb, opt=None):loss = loss_func(model(xb), yb)

    if opt is not None: loss.backward()opt.step()opt.zero_grad()

    return loss.item(), len(xb)

def fit(epochs, model, loss_func, opt, train_dl, valid_dl):for epoch in range(epochs):model.train()for xb, yb in train_dl:loss_batch(model, loss_func, xb, yb, opt)

        model.eval()with torch.no_grad():losses, nums = zip(*[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl])val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)

        print(epoch, val_loss) return None

# __main()__:

# data
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "https://github.com/pytorch/tutorials/raw/master/_static/"
FILENAME = "mnist.pkl.gz"

if not (PATH / FILENAME).exists():content = requests.get(URL + FILENAME).content(PATH / FILENAME).open("wb").write(content)
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1 ")

x_train, y_train, x_valid, y_valid = map(torch.tensor, (x_train, y_train, x_valid, y_valid)
)

train_dataset = TensorDataset(x_train, y_train)
valid_dataset = TensorDataset(x_valid, y_valid)
train_dataloader, valid_dataloader = get_data(train_ds, valid_ds, bs)
train_dataloader = WrappedDataLoader(train_dataloader, preprocess)
valid_dataloader = WrappedDataLoader(valid_dataloader, preprocess)

# hyperparameters/model
learning_rate = 0.1
epochs = 2
loss_function = F.cross_entropy # loss function
model = Mnist_CNN()
model.to(dev)
optimizer = optim.SGD(model.parameters(), lr=learning_rate , momentum=0.9)

# training
fit(epochs, model, loss_function, optimizer, train_dataloader, valid_dataloader)

As you can see, a project trunk can be divided into 4 parts:

Prepare data
Define the model
Describe the process
Actual operation

Let's split each part and compare the codes of the two ideas.

1. Prepare the data

before refactoring

 DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "https://github.com/pytorch/tutorials/raw/master/_static/"
FILENAME = "mnist.pkl.gz"

if not (PATH / FILENAME).exists():content = requests.get(URL + FILENAME).content(PATH / FILENAME).open("wb").write(content)
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1 ")

x_train, y_train, x_valid, y_valid = map(torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape

After refactoring:

 # Wrapping DataLoader
# https://pytorch.org/tutorials/beginner/basics/data_tutorial.html?highlight=dataloader
# https://pytorch.org/tutorials/beginner/data_loading_tutorial.html?highlight=dataloader

def preprocess(x, y):return x.view(-1, 1, 28, 28).to(dev), y.to(dev)

def get_data(train_ds, valid_ds, bs):return (DataLoader(train_ds, batch_size=bs, shuffle=True),DataLoader(valid_ds, batch_size=bs * 2),)

class WrappedDataLoader: def __init__(self, dl, func): self.dl = dlself.func = func

    def __len__(self):return len(self.dl)

    def __iter__(self):batches = iter(self.dl)for b in batches:yield(self.func(*b))

2. Define the model

before refactoring

 weights = torch.randn(784, 10) / math.sqrt(784)
weights.requires_grad_()
bias = torch.zeros(10, requires_grad=True)

def log_softmax(x):return x - x.exp().sum(-1).log().unsqueeze(-1)

def model(xb):return log_softmax(xb @ weights + bias)

def nll(input, target):return -input[range(target.shape[0]), target].mean()
loss_func = nll

def accuracy(out, yb):preds = torch.argmax(out, dim=1)return (preds == yb).float().mean()

after refactoring

 # If the model is simple:
model = nn.Sequential(nn.Linear(784, 10))

# generally the model is a class that inherites nn.Module and implements forward()
class Mnist_Logistic(nn.Module):def __init__(self):super().__init__()# self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))
        # self.bias = nn.Parameter(torch.zeros(10))
        self.lin = nn.Linear(784, 10)

    def forward(self, xb): # return xb @ self.weights + self.bias
        return self.lin(xb)

3. Describe the process

before refactoring

 lr = 0.5 # learning rate
epochs = 2 # how many epochs to train for

for epoch in range(epochs):for i in range((n - 1) // bs + 1):# set_trace()
        start_i = i * bsend_i = start_i + bsxb = x_train[start_i:end_i]yb = y_train[start_i:end_i]pred = model(xb)loss = loss_func(pred, yb)

        loss.backward()with torch.no_grad():weights -= weights.grad * lrbias -= bias.grad * lrweights.grad.zero_()bias.grad.zero_()

after refactoring

def loss_batch(model, loss_func, xb, yb, opt=None):loss = loss_func(model(xb), yb)

    if opt is not None: loss.backward()opt.step()opt.zero_grad()

    return loss.item(), len(xb)

def fit(epochs, model, loss_func, opt, train_dl, valid_dl):for epoch in range(epochs):model.train()for xb, yb in train_dl:loss_batch(model, loss_func, xb, yb, opt)

        model.eval()with torch.no_grad():losses, nums = zip(*[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl])val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)

        print(epoch, val_loss) return None

4. Actual operation

before refactoring

 # __main()__:
print(loss_func(model(xb), yb), accuracy(model(xb), yb))

after refactoring

 # __main()__:

# data
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "https://github.com/pytorch/tutorials/raw/master/_static/"
FILENAME = "mnist.pkl.gz"

if not (PATH / FILENAME).exists():content = requests.get(URL + FILENAME).content(PATH / FILENAME).open("wb").write(content)
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1 ")

x_train, y_train, x_valid, y_valid = map(torch.tensor, (x_train, y_train, x_valid, y_valid)
)

train_dataset = TensorDataset(x_train, y_train)
valid_dataset = TensorDataset(x_valid, y_valid)
train_dataloader, valid_dataloader = get_data(train_ds, valid_ds, bs)
train_dataloader = WrappedDataLoader(train_dataloader, preprocess)
valid_dataloader = WrappedDataLoader(valid_dataloader, preprocess)

# hyperparameters/model
learning_rate = 0.1
epochs = 2
loss_function = F.cross_entropy # loss function
model = Mnist_CNN()
model.to(dev)
optimizer = optim.SGD(model.parameters(), lr=learning_rate , momentum=0.9)

# training
fit(epochs, model, loss_function, optimizer, train_dataloader, valid_dataloader)