20
loading...
This website collects cookies to deliver better user experience
import pandas as pd
import numpy as np
df=pd.read_csv('melb_data.csv')
df
#sum() returns the number of missing values in the data set.
df.isnull().sum()
BuildingArea
YearBuilt
CouncilArea
axis=1 is used to drop the column with NaN
values.
axis=0 is used to drop the row with NaN
values.
df['BuildingArea'].fillna(int(df['BuildingArea'].mean()), inplace=True)
df['YearBuilt'].fillna(int(df['YearBuilt'].mean()), inplace=True)
df.info()
df['CouncilArea'].fillna(df['CouncilArea'].mode(), inplace=True)
df['BuildingArea'].fillna(0, inplace=True)
df['YearBuilt '].fillna(0, inplace=True)
df['CouncilArea_Missing'] = df['CouncilArea'].isnull()
from sklearn.impute import SimpleImputer
my_imputer = SimpleImputer(strategy = 'mean')
df['CouncilArea_Missing']