34
loading...
This website collects cookies to deliver better user experience
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # for data visualization
%matplotlib inline
import seaborn as sns # for advanced data visualizations
vgSales = pd.read_csv("../input/videogamesales/vgsales.csv")
vgSales.head()
VariableName = pd.read_csv("file_name.csv")
vgSales.info()
vgSales.isna().sum()
print("Max Year Value: ", vgSales['Year'].max())
maxEntry = vgSales['Year'].idxmax()
vgSales.iloc[maxEntry]
vgSales["Year"] = vgSales["Year"].replace(2020.0, 2009.0)
print("Max Year Value: ", vgSales["Year"].max())
YearAnamoly = vgSales[vgSales['Year'].isnull()]['Name'].unique()
print("The year records having such anomaly: ", len(YearAnamoly))
plt.figure(figsize = (20,20))
plt.bar(vgSales["Year"].value_counts().index, vgSales["Year"].value_counts())
plt.title("The most games produced in a specific year")
plt.show()
vgSales['Year'] = vgSales['Year'].fillna(2009.0)
vgSales['Year'].isnull().sum()
vgSales['Year'] = vgSales['Year'].astype('int64')
print("The Skew Count of NA_Sales Column is:",vgSales["NA_Sales"].skew())
print("The Skew Count of EU_Sales Column is:",vgSales["EU_Sales"].skew())
print("The Skew Count of JP_Sales Column is:",vgSales["JP_Sales"].skew())
print("The Skew Count of Other_Sales Column is:",vgSales["Other_Sales"].skew())
vgSales["NA_Sales"] = vgSales["NA_Sales"]**(1/2)
vgSales["EU_Sales"] = vgSales["EU_Sales"]**(1/2)
vgSales["JP_Sales"] = vgSales["JP_Sales"]**(1/2)
vgSales["Other_Sales"] = vgSales["Other_Sales"]**(1/2)
vgSales["Global_Sales"] = vgSales["NA_Sales"] + vgSales["EU_Sales"] + vgSales["JP_Sales"] + vgSales["Other_Sales"]
vgSales[["Genre","Global_Sales"]].groupby("Genre").sum()
plt.figure(figsize = (20,20))
plt.bar(vgSales["Platform"].value_counts().index, vgSales["Platform"].value_counts())
plt.title("Most Games produced in Specific Gaming Platform")
plt.show()