Once you’ve defined your model, the next step is training. In PyTorch, training involves preparing data, defining a loss function and optimizer, and implementing the training loop where forward and backward passes are performed.
nn.Module
import torch
from torch.utils.data import DataLoader, TensorDataset
# Dummy data
X = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y = torch.tensor([[2.0], [4.0], [6.0], [8.0]])
# Wrap in DataLoader
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=2, shuffle=True)
import torch.nn as nn
class LinearModel(nn.Module):
def __init__(self):
super(LinearModel, self).__init__()
self.linear = nn.Linear(1, 1)
def forward(self, x):
return self.linear(x)
model = LinearModel()
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
epochs = 100
for epoch in range(epochs):
for batch_X, batch_y in loader:
# Forward pass
outputs = model(batch_X)
loss = criterion(outputs, batch_y)
# Backward pass and optimization
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch + 1) % 10 == 0:
print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")
# Save model
torch.save(model.state_dict(), 'linear_model.pth')
# Load model
model2 = LinearModel()
model2.load_state_dict(torch.load('linear_model.pth'))
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split
X = torch.tensor([[1.0], [2.0], [3.0], [4.0], [5.0]])
y = torch.tensor([[2.0], [4.0], [6.0], [8.0], [10.0]])
dataset = TensorDataset(X, y)
train_set, val_set = random_split(dataset, [4, 1])
train_loader = DataLoader(train_set, batch_size=2, shuffle=True)
val_loader = DataLoader(val_set, batch_size=1)
class LinearModel(nn.Module):
def __init__(self):
super(LinearModel, self).__init__()
self.linear = nn.Linear(1, 1)
def forward(self, x):
return self.linear(x)
model = LinearModel()
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
epochs = 50
for epoch in range(epochs):
# Training phase
model.train()
for batch_X, batch_y in train_loader:
outputs = model(batch_X)
loss = criterion(outputs, batch_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Validation phase
model.eval()
with torch.no_grad():
for val_X, val_y in val_loader:
val_output = model(val_X)
val_loss = criterion(val_output, val_y)
if (epoch + 1) % 10 == 0:
print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}")
X = torch.tensor([
[1.0, 2.0],
[2.0, 3.0],
[3.0, 4.0],
[4.0, 5.0]
])
y = torch.tensor([[5.0], [7.0], [9.0], [11.0]])
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=2, shuffle=True)
class TwoFeatureLinear(nn.Module):
def __init__(self):
super(TwoFeatureLinear, self).__init__()
self.linear = nn.Linear(2, 1)
def forward(self, x):
return self.linear(x)
model = TwoFeatureLinear()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
for epoch in range(30):
for batch_X, batch_y in loader:
pred = model(batch_X)
loss = criterion(pred, batch_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print("Final Weights:", model.linear.weight)
print("Final Bias:", model.linear.bias)
# Save the model weights only
torch.save(model.state_dict(), 'model_weights.pth')
# To load later
loaded_model = TwoFeatureLinear()
loaded_model.load_state_dict(torch.load('model_weights.pth'))
loaded_model.eval()
With PyTorch, training a model involves a clear and flexible loop for computing predictions, evaluating loss, and updating weights. DataLoader efficiently handles batching and shuffling, making training both simple and scalable.