22
loading...
This website collects cookies to deliver better user experience
This article presumes that you know some machine learning principles and have familiarity with Python and its data science libraries.
# to Install Geopy Library
pip install geopy
conda install -c conda-forge geopy
# to Install Folium Library
pip install folium
conda install -c conda-forge folium
train
that, among other features, includes Pickup Long
and Pickup Lat
. This will be the dataset used for testing.import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize = (15,8))
sns.scatterplot(train[‘Pickup Lat’], train[‘Pickup Long’])
import folium
# create the map.
map_pickup = folium.Map( location=[-1.317755,36.830370])
# adding the latitude and longitude points to the map.
train.apply(lambda row:folium.CircleMarker(location=[row["Pickup Lat"], row["Pickup Long"]] ).add_to(map_pickup), axis=1)
# display the map: just ask for the object representation in juypter notebook.
map_pickup
# optional: save the map.
map_pickup.save('map_pickup.html')
train = train.round({"Pickup Lat":4, "Pickup Long":4})
radians()
.import numpy as np
train["Pickup Lat"] = np.radians(train["Pickup Lat"])
train["Pickup Long"] = np.radians(train["Pickup Long"])
from sklearn.cluster import KMeans ,AgglomerativeClustering
# creates 5 clusters using hierarchical clustering.
agc = AgglomerativeClustering(n_clusters =5, affinity='euclidean', linkage='ward')
train['pickup cluster'] = agc.fit_predict(train[['Pickup Lat','Pickup Long']])
# creates 5 clusters using k-means clustering algorithm.
kmeans = KMeans(5)
clusters = kmeans.fit_predict(train[['Pickup Lat','Pickup Long']])
train['pickup cluster'] = kmeans.predict(train[['Pickup Lat','Pickup Long']])
from geopy.geocoders import Nominatim
# create the locator
geolocator = Nominatim(user_agent="myGeocoder")
# getting the location address
location = geolocator.reverse("52.509669, 13.376294")
print(location)
# >>> result : Backwerk, Potsdamer Platz, Tiergarten, Mitte, Berlin, 10785, Deutschland
# getting address compontent like street, city, state, country, country code, postalcode and so on.
location.raw.get('address').get('state')
location.raw.get('address').get('city_district')
location.raw.get('address').get('country')
location.raw.get('address').get('postcode')
import numpy as np
def haversine_distance(row):
lat_p, lon_p = row['Pickup Lat'], row['Pickup Long']
lat_d, lon_d = row['Destination Lat'], row['Destination Long']
radius = 6371 # km
dlat = np.radians(lat_d - lat_p)
dlon = np.radians(lon_d - lon_p)
a = np.sin(dlat/2) * np.sin(dlat/2) + np.cos(np.radians(lat_p)) * np.cos(np.radians(lat_d)) * np.sin(dlon/2) * np.sin(dlon/2)
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
distance = radius * c
return distance
train['distance'] = train.apply(haversine_distance, axis = 1)
x = cos(lat) * cos(lon)
y = cos(lat) * sin(lon)
z = sin(lat)
import numpy as np
train['pickup x'] = np.cos(train['Pickup Lat']) * np.cos(train['Pickup Long'])
train['pickup y'] = np.cos(train['Pickup Lat']) * np.sin(train['Pickup Long'])
train['pickup z'] = np.sin(train['Pickup Lat'])
Finally, the aforementioned methods may not be helpful to your model, because it depends on your problem. But rather, these are just some ideas that you can experiment with to see what fits your problem best.