import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import sklearn.model_selection
import torch
import torch.nn as nn
from torch.nn.functional import one_hot
from torch.utils.data import DataLoader,Dataset,TensorDataset
# Load the dataset
= 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
url = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model Year', 'Origin']
column_names
= pd.read_csv(url, names=column_names,
df = "?", comment='\t',
na_values =" ", skipinitialspace=True)
sep
# Dropping rows with missing values
= df.dropna().reset_index(drop=True)
df
# Splitting the data into train and test sets
= sklearn.model_selection.train_test_split(df, train_size=0.8, random_state=1)
df_train, df_test
# Standardizing continuous features
= ['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration']
numeric_column_names = df_train.describe().transpose()
train_stats
= df_train.copy(), df_test.copy()
df_train_norm, df_test_norm for col_name in numeric_column_names:
= train_stats.loc[col_name, 'mean']
mean = train_stats.loc[col_name, 'std']
std = (df_train_norm[col_name] - mean) / std
df_train_norm[col_name] = (df_test_norm[col_name] - mean) / std
df_test_norm[col_name]
# Bucketing the model year categories
= torch.tensor([73, 76, 79])
boundaries
= torch.tensor(df_train_norm['Model Year'].values)
v 'Model Year Bucketed'] = torch.bucketize(v, boundaries, right=True)
df_train_norm[
= torch.tensor(df_test_norm['Model Year'].values)
v 'Model Year Bucketed'] = torch.bucketize(v, boundaries, right=True)
df_test_norm[
'Model Year Bucketed')
numeric_column_names.append(
# One-hot encoding the origin feature
= len(set(df_train_norm['Origin']))
total_origin
= one_hot(torch.from_numpy(df_train_norm['Origin'].values) % total_origin)
origin_encoded
# Creating the train and test feature and label tensors
= torch.tensor(df_train_norm[numeric_column_names].values)
x_train_numeric = torch.cat([x_train_numeric, origin_encoded], 1).float()
x_train
= one_hot(torch.from_numpy(df_test_norm['Origin'].values) % total_origin)
origin_encoded = torch.tensor(df_test_norm[numeric_column_names].values)
x_test_numeric = torch.cat([x_test_numeric, origin_encoded], 1).float()
x_test
= torch.tensor(df_train_norm['MPG'].values).float()
y_train = torch.tensor(df_test_norm['MPG'].values).float()
y_test
# Creating a data loader to load the train dataset in batches
= TensorDataset(x_train, y_train)
train_ds = 8
batch_size 1)
torch.manual_seed(= DataLoader(train_ds, batch_size, shuffle=True) train_dl
Introduction
In this article, we’ll dive into the world of deep learning with PyTorch by constructing a multiple linear regression model to predict a vehicle’s miles per gallon (MPG) based on various features. We’ll explore the preprocessing steps, model architecture, training process, and evaluation of the model’s performance.
Preparing the Data and Data Preprocessing
Our journey begins by loading the auto MPG dataset, which contains information about vehicle characteristics and their corresponding MPG values. We’ll focus on features like the number of cylinders, displacement, horsepower, weight, acceleration, manufacturing origin, and model year.
To ensure our data is suitable for training, we perform necessary preprocessing steps. We drop rows with missing values, standardize continuous features, and transform categorical features into one-hot encoded vectors.
Building the DNN Regression Model
With our data prepared, we move on to constructing our Deep Neural Network (DNN) regression model using PyTorch. This model will predict MPG values based on the vehicle’s features.
# Define the model architecture
= [8, 4]
hidden_units = x_train.shape[1]
input_size
= []
all_layers for hidden_unit in hidden_units:
= nn.Linear(input_size, hidden_unit)
layer
all_layers.append(layer)
all_layers.append(nn.ReLU())= hidden_unit
input_size
-1], 1))
all_layers.append(nn.Linear(hidden_units[= nn.Sequential(*all_layers)
model
# Define the loss function and optimizer
= nn.MSELoss()
loss_fn = torch.optim.SGD(model.parameters(), lr=0.001) optimizer
Training the Model
It’s time to train our DNN regression model on the training data. We iterate through the data for a specified number of epochs, adjusting the model’s weights to minimize the mean squared error loss.
= 200
num_epochs = 20
log_epochs
for epoch in range(num_epochs):
= 0
loss_hist_train for x_batch, y_batch in train_dl:
= model(x_batch)[:, 0]
pred = loss_fn(pred, y_batch)
loss
loss.backward()
optimizer.step()
optimizer.zero_grad()+= loss.item()
loss_hist_train if epoch % log_epochs == 0:
print(f'Epoch {epoch} Loss {loss_hist_train/len(train_dl):.4f}')
Epoch 0 Loss 530.7308
Epoch 20 Loss 7.8103
Epoch 40 Loss 7.7546
Epoch 60 Loss 6.9081
Epoch 80 Loss 6.8482
Epoch 100 Loss 6.7144
Epoch 120 Loss 6.4509
Epoch 140 Loss 7.1134
Epoch 160 Loss 6.4428
Epoch 180 Loss 6.2078
Evaluating the Model
Once the model is trained, we assess its performance on the test dataset. This helps us understand how well the model generalizes to new, unseen data.
with torch.no_grad():
= model(x_test.float())[:, 0]
pred = loss_fn(pred, y_test)
loss = nn.L1Loss()(pred, y_test)
mae print(f'Test MSE: {loss.item():.4f}')
print(f'Test MAE: {mae.item():.4f}')
Test MSE: 13.1923
Test MAE: 2.6507
Seeing the good metrics, let’s confirm by plotting the actual and predicted values
# Plotting actual vs. predicted MPG values
=(10, 6))
plt.figure(figsize='blue', label='Predicted')
plt.scatter(y_test, pred, colormin(), y_test.max()], [y_test.min(), y_test.max()], color='red', linewidth=2, linestyle='--', label='Perfect Prediction')
plt.plot([y_test.'Actual MPG')
plt.xlabel('Predicted MPG')
plt.ylabel('Actual vs. Predicted MPG Values')
plt.title(
plt.legend() plt.show()
Conclusion
Our DNN regression model demonstrates promising results in predicting MPG values based on vehicle features. By carefully preprocessing the data, constructing an appropriate model architecture, and iteratively training the model, we achieve a model that generalizes reasonably well to new data. This article serves as a starting point for your journey into deep learning with PyTorch, enabling you to build more advanced models and tackle a variety of data analysis challenges.