当前位置：网站首页>Facial key point detection CNN

Facial key point detection CNN

2022-07-19 07:54:00 【The soul is on the way】

CNN

Introduce
Code

data ： link ：https://pan.baidu.com/s/1q1GL16Jix7ErkDBghAjrqg?pwd=o48x
Extraction code ：o48x

Introduce

Feature introduction ：
Each predicted key point is indexed by (x,y) Real value pair assignment . Yes 15 A key point , The following elements representing the face ：
left_eye_center,right_eye_center,left_eye_inner_corner,left_eye_outer_corner,right_eye_inner_corner,right_eye_outer_corner,left_eyebrow_inner_end,left_eyebrow_outer_end,right_eyebrow_inner_end,right_eyebrow_outer_end,nose_tip,mouth_left_corner,mouth_right_corner,mouth_center_top_lip,mouth_center_bottom_lip
The left and right here refer to the perspective of the subject .

In some examples , Some target key positions are missing （ Encoded as csv Missing entries in , That is, there is nothing between two commas ）.

The input image is given in the last field of the data file , From the pixel list （ Sort by row ） form , by (0,255) The integer . The image is 96x96 Pixels .

Data files ：
training.csv： Training 7049 A list of images . Each row contains 15 A key point (x,y) coordinate , The image data is a list of pixels sorted by row .
test.csv： 1783 A list of test images . Each row contains ImageId And image data as a row sorted list of pixels
submitFileFormat.csv ： To predict 27124 A list of key points . Each line contains a RowId、ImageId、FeatureName、Location.FeatureName yes “left_eye_center_x”、“right_eyebrow_outer_end_y” etc. . Location is what you need to predict .

Evaluation indicators ：RMSE

Code

Processing data

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn  #  Basic modules 
import torch.nn.functional as F  # Some functions of convolution 
import torch.optim as optim # Optimizer 
import torch.utils.data as data_utils
from torch.utils.data.sampler import SubsetRandomSampler  # Random sampling

#  Decompress the compressed package , You can also decompress it yourself 
!unzip ../input/facial-keypoints-detection/test.zip
!unzip ../input/facial-keypoints-detection/training.zip

train_data = pd.read_csv("./training.csv")
test_data = pd.read_csv("./test.csv")

#  Transpose is for convenience of viewing .
train_data.head().T

Insert picture description here

#  Look at the missing value 
train_data.isna().sum()

Insert picture description here

#  Means to fill the missing value with the previous non missing value 
train_data.fillna(method='ffill', inplace=True)

#  By observing the characteristic information , It is understood that the last column represents the pixels of the original image .
# Take out the last column here   Convenient for subsequent graphical display 
img_dt = []

for i in range(len(train_data)):
  img_dt.append(train_data['Image'][i].split(' '))
  
X = np.array(img_dt, dtype='float')

#  Here is the image ,
plt.imshow(X[1].reshape(96,96), cmap='gray')

Insert picture description here

#  The previous column is the key point 
facial_pts_data = train_data.drop(['Image'], axis=1)
facial_pts = []

for i in range(len(facial_pts_data)):
	facial_pts.append(facial_pts_data.iloc[i])
  
y = np.array(facial_pts, dtype='float')

structure CNN Model

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=5) # (b,1,96,96) to (b,4,92,92)
        self.conv1_bn = nn.BatchNorm2d(4)  # normalization 
        self.conv2 = nn.Conv2d(in_channels=4, out_channels=64, kernel_size=3) # (b,4,46,46) to (b,64,44,44)
        self.conv2_bn = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3) # (b,64,22,22) to (b,128,20,20)
        self.conv3_bn = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3) # (b,128,10,10) to (b,256,8,8)
        self.conv4_bn = nn.BatchNorm2d(256)
        self.fc1 = nn.Linear(256*4*4, 1024)
        self.fc2 = nn.Linear(1024, 256)
        self.fc3 = nn.Linear(256, 30)
        self.dp1 = nn.Dropout(p=0.4)
    
        
    
    def forward(self, x, verbose=False):
		#  Four layers 
        x = self.conv1_bn(self.conv1(x))
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = self.dp1(x)
        
        x = self.conv2_bn(self.conv2(x))
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = self.dp1(x)
        
        x = self.conv3_bn(self.conv3(x))
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        x = self.dp1(x)
        
        x = self.conv4_bn(self.conv4(x))
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        #  Here, some data is discarded randomly , The purpose is to try to avoid the disappearance of gradients .
        #x = self.dp1(x)
        
        x = x.view(-1, 256*4*4)
        
		#  Fully connected layer .
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dp1(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.dp1(x)
        x = self.fc3(x)
        return x

Training and testing

def train_test_split(X, validation_split):
    dataset_size = len(X)
    indices = list(range(dataset_size))
    val_num = int(np.floor(validation_split*dataset_size))
    np.random.shuffle(indices)
    train_indices, val_indices = indices[val_num:], indices[:val_num]

    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)

    loader_object = data_utils.TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).float())
    train_loader = data_utils.DataLoader(loader_object, batch_size=32, sampler=train_sampler)
    valid_loader = data_utils.DataLoader(loader_object, batch_size=32, sampler=valid_sampler)
    return train_loader, valid_loader

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def training(epochs, model, criterion, device, train_loader, valid_loader, optimizer):
    train_error_list = []
    val_error_list = []
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            data = data.view(-1, 96*96)
            data = data.view(-1, 1, 96, 96)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            train_loss += loss.item()
            loss.backward()
            optimizer.step()

        train_loss /= len(train_loader.dataset)
        eval_loss = testing(model, device, valid_loader)
        train_error_list.append(train_loss)
        val_error_list.append(eval_loss)
        if (epoch+1) % 25 == 0:
            print("End of epoch {}: \nTraining error = [{}]\tValidation error = [{}]".format(epoch+1, train_loss, eval_loss))
    return train_error_list, val_error_list

def testing(model, device, valid_loader):
    model.eval()
    test_loss = 0
    for data, target in valid_loader:
        data, target = data.to(device), target.to(device)
        data = data.view(-1, 96*96)
        data = data.view(-1, 1, 96, 96)
        output = model(data)
        loss = criterion(output, target)
        test_loss += loss.item()

    test_loss /= len(valid_loader.dataset)
    return test_loss

n_hidden = 128 
output_size = 30
train_loader, valid_loader = train_test_split(X, 0.2)

model = CNN()
model.to(device)
criterion = torch.nn.MSELoss() 
optimizer = optim.Adam(model.parameters())

print('Number of parameters: {}'.format(get_n_params(model)))

train_error_list, valid_error_list = training(500, model, criterion, device, train_loader, valid_loader, optimizer)

Print it out to see the effect

def plot_samples(X, y, model, num_samples):
    fig, axes = plt.subplots(nrows=num_samples, ncols=2, figsize=(10,20))

    for row in range(num_samples):
        sample_idx = np.random.choice(len(X))
        x = X[sample_idx]
        x = torch.from_numpy(x).float().view(1,1,96,96).to(device)
        actual_y = y[sample_idx]
        pred_y = model(x)
        img = X[sample_idx].reshape(96,96)

        actual_y = np.vstack(np.split(actual_y, 15)).T
        pred_y = pred_y.cpu().data.numpy()[0]
        pred_y = np.vstack(np.split(pred_y, 15)).T

        axes[row, 0].imshow(img, cmap='gray')
        axes[row, 0].plot(actual_y[0], actual_y[1], 'o', color='red', label='actual')
        axes[row, 0].legend()
        axes[row, 1].imshow(img, cmap='gray')
        axes[row, 1].plot(actual_y[0], actual_y[1], 'o', color='red', label='actual')
        axes[row, 1].plot(pred_y[0], pred_y[1], 'o', color='green', label='predicted')
        axes[row, 1].legend()


plot_samples(X, y, model, 5)

Insert picture description here

forecast

img_dt = []

for i in range(len(test_data)):
    img_dt.append(test_data['Image'][i].split(' '))
    
test_X = np.array(img_dt, dtype='float')

test_X_torch = torch.from_numpy(test_X).float().view(len(test_X),1,96,96).to(device)
test_predictions = model(test_X_torch)
test_predictions = test_predictions.cpu().data.numpy()

keypts_labels = train_data.columns.tolist()

def plot_samples_test(X, y, num_samples):
    fig, axes = plt.subplots(nrows=1, ncols=num_samples, figsize=(20,12))

    for row in range(num_samples):
        sample_idx = np.random.choice(len(X))
        img = X[sample_idx].reshape(96,96)
        predicted = y[sample_idx]

        predicted = np.vstack(np.split(predicted, 15)).T
    # print(img, predicted)
        axes[row].imshow(img, cmap='gray')
        axes[row].plot(predicted[0], predicted[1], 'o', color='green', label='predicted')
        axes[row].legend()

plot_samples_test(test_X, test_predictions, 6)