import pandas as pd
from math import sqrt, pow
import matplotlib.pyplot as plt
def euclidean_distance(x,y):
return sqrt(sum(pow(a-b,2) for a, b in zip(x, y)))
def manhattan_distance(x,y):
return sum(abs(a-b) for a,b in zip(x,y))
data = pd.read_csv('data.csv')
# Draw Graph
x = data[data['Gender']=='Male'].plot(kind='scatter', x= 'Height',y = 'Weight',color='blue',figsize=(10,7))
data[data['Gender']=='Female'].plot(kind='scatter',x= 'Height',y = 'Weight',color='pink',figsize=(10,7) ,ax=x)
plt.xlabel('Height')
plt.ylabel('weight')
plt.title('Analyze Height and Weight of men and women')
plt.legend(labels=['Males','Females'])
plt.show()
# Retrieving all heights and weights
heights = data['Height']
weights = data['Weight']
# Calculating Euclidean Distance
euclidean_result = euclidean_distance(heights, weights)
# Calculating Manhattan Distance
manhattan_result = manhattan_distance(heights, weights)
# To get value between 0 and 1
euclidean_result = 1 / (1 + euclidean_result)
manhattan_result = 1 / (1 + manhattan_result)
# Checking if the similarity value is nearest to 0 or 1
# Eucliean Distance
if round(euclidean_result) == 0:
print(f"This is no similarity using Euclidean Distance between heights and weights for this dataset with similarity value = {euclidean_result}")
else:
print(f"This is similarity using Euclidean Distance between heights and weights for this dataset with similarity value = {euclidean_result}")
print('------------------------------------------------------')
# Manhattan Distance
if round(manhattan_result) == 0:
print(f"This is no similarity using Manhattan Distance between heights and weights for this dataset with similarity value = {manhattan_result}")
else:
print(f"This is similarity using Manhattan Distance between heights and weights for this dataset with similarity value = {manhattan_result}")
This is no similarity using Euclidean Distance between heights and weights for this dataset with similarity value = 0.00010071759753209523
------------------------------------------------------
This is no similarity using Manhattan Distance between heights and weights for this dataset with similarity value = 1.0518244744130195e-06