23
loading...
This website collects cookies to deliver better user experience
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'browser'
x = np.random.exponential(size=100000, scale=20) + 50000
df = pd.DataFrame({
'monthly_salary': x,
'default': np.random.choice([1, 0], size=len(x))
})
# Purpose of this column is to help us count number of clients
# that belong to each bin group.
df['help_column'] = 1
fig = px.histogram(
data_frame=df,
x='monthly_salary',
nbins=200
)
fig.show()
bins_ = pd.interval_range(start=50000, end=50100, freq=1)
df['monthly_salary_BINS'] = pd.cut(x=x, bins=bins_)
# Idea is to have lower left boundary instead of upper-lower bound
# It is easier for plotting
df['monthly_salary_BINS_left'] = df['monthly_salary_BINS'].apply(func=lambda x: x.left)
xx = df[['help_column', 'monthly_salary_BINS_left']].groupby(by='monthly_salary_BINS_left').sum().reset_index()
fig = px.bar(
x=xx['monthly_salary_BINS_left'],
y=xx['help_column']
)
fig.show()
fig = px.histogram(
data_frame=df_filtered,
x='monthly_salary',
color='default',
nbins=200,
barmode='group'
)
fig.show()
df_pivoted = pd.pivot_table(data=df,
values='help_column',
index='monthly_salary_BINS_left',
columns='default',
aggfunc='sum')
fig = px.bar(
df_pivoted,
barmode='group'
)
fig.show()