当前位置：网站首页>Calculate the Euclidean distance between the row vectors of two matrices

Calculate the Euclidean distance between the row vectors of two matrices

2022-07-18 15:29:00 【RSMung】

1 Problem description

matrix P The size is [m, d] Expressed as a row vector P1, P2,...,Pm

matrix C The size is [n, d] Expressed as a row vector C1, C2,...,Cn

O matrix P Every row vector and matrix of C Euclidean distance of each row vector of

A typical example is KNN When the algorithm is applied to the clustering of two-dimensional points , When calculating the Euclidean distance between points .

2 terms of settlement 1—— Two layers of circulation

Use two-layer loop , Calculation of matrix P Of the i Row vectors and matrices C Of the j Euclidean distance of a row vector

def l2distanceForMatrix_2loop(a, b):
    time1 = time.time()
    #  The two-level cycle calculates the distance between each sample in the two matrices 
    num_a = a.shape[0]
    num_b = b.shape[0]
    print(f" matrix a The number of data :{num_a},  matrix b The number of data :{num_b}")
    distance = torch.zeros((num_a, num_b))
    for i in range(num_a):
        for j in range(num_b):
            #  Subtract first ,  The corresponding elements are subtracted 
            #  And then square it 
            #  Add again 
            #  Final square 
            # (x1, y1)  And  (x2, y2) The European distance of :
            # sqrt( (x1-x2)^2 + (y1-y2)^2 )
            distance[i][j] = torch.sqrt(
                torch.sum(
                    torch.square(a[i] - b[j])
                )
            )
    time2 = time.time()
    print(f" Spend time :{time2 - time1}")
    print(distance)
    return distance

3 terms of settlement 2—— A cycle

Calculation of matrix P Of the i Row vectors and matrices C The European distance of

def l2distanceForMatrix_1loop(a, b):
    time1 = time.time()
    # 1 The layer circulates to calculate the distance between each sample in the two matrices 
    num_a = a.shape[0]
    num_b = b.shape[0]
    distance = torch.zeros((num_a, num_b))
    for i in range(num_a):
        #  matrix a pass the civil examinations i Samples and matrices b The Euclidean distance of the sample in 
        # temp = torch.square(a[i] - b)
        # print(temp.shape)
        # print(temp)
        # temp = torch.sum(temp, dim=1)
        # print(temp)
        # temp = torch.sqrt(temp)
        # print(temp)
        # distance[i] = temp
        distance[i] = torch.sqrt(
                torch.sum(
                    torch.square(a[i] - b),
                    dim=1
                )
            )
    time2 = time.time()
    print(f" Spend time :{time2 - time1}")
    print(distance)
    return distance

4 terms of settlement 3—— No cycle , Using matrix operations

4.1 How to write it 1

def l2distanceForMatrix(a, b):
    time1 = time.time()
    #  Use matrix operation to calculate the Euclidean distance of each sample in the two matrices 
    m = a.shape[0]
    n = b.shape[0]
    #  Square each element of the matrix 
    aa = torch.pow(a, 2)   # [m, d]
    #  Sum up by line ,  And keep the number of dimensions unchanged 
    aa = torch.sum(aa, dim=1, keepdim=True)   # [m, 1]
    #  The matrix aa from [m, 1] The shape of extends to [m, n]
    aa = aa.expand(m, n)   # [m, n]

    #  Processing matrix b
    bb = torch.pow(b, 2).sum(dim=1, keepdim=True).expand(n ,m)   # [n, m]
    bb = torch.transpose(bb, 0, 1)   # [m, n]

    #  Calculate the third term    [m, d] * [d, n] = [m, n]
    tail = 2 * torch.matmul(a, torch.transpose(b, 0, 1))

    #  Calculate the final result 
    distance = torch.sqrt(aa + bb - tail)
    time2 = time.time()
    print(f" Spend time :{time2 - time1}")
    print(distance)
    return distance

4.2 How to write it 2

def l2distanceForMatrix2(a, b):
    print(" Method l2distanceForMatrix2")
    time1 = time.time()
    m = a.shape[0]
    n = b.shape[0]
    #  Calculation a*a^T
    matrix_a = torch.matmul(a, torch.transpose(a, 0, 1))
    matrix_b = torch.matmul(b, torch.transpose(b, 0, 1))
    #  Take out matrix_a The main diagonal element in the matrix ,  The result is the row vector 2 norm 
    diag_a = torch.diag(matrix_a)   # [m]
    # print(diag_a.shape)
    #  Expand dimensions 
    aa = diag_a.unsqueeze(1)   # [m, 1]
    aa = aa.expand(m, n)   # [m, n]
    # print(aa.shape)
    # print(aa)

    #  Treat the matrix in the same way b
    diag_b = torch.diag(matrix_b)   # [n]
    diag_b = diag_b.unsqueeze(1)   # [n, 1]
    #  Expand dimensions 
    bb = diag_b.expand(n, m)   # [n, m]
    #  Transposition 
    bb = torch.transpose(bb, 0, 1)   # [m, n]
    # print(bb.shape)
    # print(bb)

    #  Calculate the third term    [m, d] * [d, n] = [m, n]
    tail = 2 * torch.matmul(a, torch.transpose(b, 0, 1))

    #  Calculate the final result 
    distance = torch.sqrt(aa + bb - tail)
    time2 = time.time()
    print(f" Spend time :{time2 - time1}")
    print(distance)
    return distance

5 Test code

import os
import torch
import torch.nn as nn
import time

def l2distanceForMatrix_2loop(a, b):
    time1 = time.time()
    #  The two-level cycle calculates the distance between each sample in the two matrices 
    num_a = a.shape[0]
    num_b = b.shape[0]
    print(f" matrix a The number of data :{num_a},  matrix b The number of data :{num_b}")
    distance = torch.zeros((num_a, num_b))
    for i in range(num_a):
        for j in range(num_b):
            #  Subtract first ,  The corresponding elements are subtracted 
            #  And then square it 
            #  Add again 
            #  Final square 
            # (x1, y1)  And  (x2, y2) The European distance of :
            # sqrt( (x1-x2)^2 + (y1-y2)^2 )
            distance[i][j] = torch.sqrt(
                torch.sum(
                    torch.square(a[i] - b[j])
                )
            )
    time2 = time.time()
    print(f" Spend time :{time2 - time1}")
    print(distance)
    return distance

def l2distanceForMatrix_1loop(a, b):
    time1 = time.time()
    # 1 The layer circulates to calculate the distance between each sample in the two matrices 
    num_a = a.shape[0]
    num_b = b.shape[0]
    distance = torch.zeros((num_a, num_b))
    for i in range(num_a):
        #  matrix a pass the civil examinations i Samples and matrices b The Euclidean distance of the sample in 
        # temp = torch.square(a[i] - b)
        # print(temp.shape)
        # print(temp)
        # temp = torch.sum(temp, dim=1)
        # print(temp)
        # temp = torch.sqrt(temp)
        # print(temp)
        # distance[i] = temp
        distance[i] = torch.sqrt(
                torch.sum(
                    torch.square(a[i] - b),
                    dim=1
                )
            )
    time2 = time.time()
    print(f" Spend time :{time2 - time1}")
    print(distance)
    return distance


def l2distanceForMatrix(a, b):
    print(" Method l2distanceForMatrix")
    time1 = time.time()
    #  Use matrix operation to calculate the Euclidean distance of each sample in the two matrices 
    m = a.shape[0]
    n = b.shape[0]
    #  Square each element of the matrix 
    aa = torch.pow(a, 2)   # [m, d]
    #  Sum up by line ,  And keep the number of dimensions unchanged 
    aa = torch.sum(aa, dim=1, keepdim=True)   # [m, 1]
    #  The matrix aa from [m, 1] The shape of extends to [m, n]
    aa = aa.expand(m, n)   # [m, n]
    # print(aa.shape)
    # print(aa)

    #  Processing matrix b
    bb = torch.pow(b, 2).sum(dim=1, keepdim=True).expand(n ,m)   # [n, m]
    bb = torch.transpose(bb, 0, 1)   # [m, n]
    # print(bb.shape)
    # print(bb)

    #  Calculate the third term    [m, d] * [d, n] = [m, n]
    tail = 2 * torch.matmul(a, torch.transpose(b, 0, 1))

    #  Calculate the final result 
    distance = torch.sqrt(aa + bb - tail)
    time2 = time.time()
    print(f" Spend time :{time2 - time1}")
    print(distance)
    return distance


def l2distanceForMatrix2(a, b):
    print(" Method l2distanceForMatrix2")
    time1 = time.time()
    m = a.shape[0]
    n = b.shape[0]
    #  Calculation a*a^T
    matrix_a = torch.matmul(a, torch.transpose(a, 0, 1))
    matrix_b = torch.matmul(b, torch.transpose(b, 0, 1))
    #  Take out matrix_a The main diagonal element in the matrix ,  The result is the row vector 2 norm 
    diag_a = torch.diag(matrix_a)   # [m]
    # print(diag_a.shape)
    #  Expand dimensions 
    aa = diag_a.unsqueeze(1)   # [m, 1]
    aa = aa.expand(m, n)   # [m, n]
    # print(aa.shape)
    # print(aa)

    #  Treat the matrix in the same way b
    diag_b = torch.diag(matrix_b)   # [n]
    diag_b = diag_b.unsqueeze(1)   # [n, 1]
    #  Expand dimensions 
    bb = diag_b.expand(n, m)   # [n, m]
    #  Transposition 
    bb = torch.transpose(bb, 0, 1)   # [m, n]
    # print(bb.shape)
    # print(bb)

    #  Calculate the third term    [m, d] * [d, n] = [m, n]
    tail = 2 * torch.matmul(a, torch.transpose(b, 0, 1))

    #  Calculate the final result 
    distance = torch.sqrt(aa + bb - tail)
    time2 = time.time()
    print(f" Spend time :{time2 - time1}")
    print(distance)
    return distance


def main():
    # a = torch.randn((600, 2))
    # b = torch.randn((600, 2))
    a = torch.tensor(
            [
                [1, 2],
                [3, 4],
                [5, 6],
                [7, 8],
                [9, 10]
            ]
        ).float()
    b = torch.tensor(
            [
                [3, 4],
                [5, 6],
                [7, 8],
                [9, 10]
            ]
        ).float()
    l2distanceForMatrix_2loop(a, b)
    l2distanceForMatrix_1loop(a, b)
    l2distanceForMatrix(a, b)
    l2distanceForMatrix2(a, b)


if __name__ == "__main__":
    main()