23
loading...
This website collects cookies to deliver better user experience
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
matplotlib
: used to plot the data in a graphical mannerpandas
: used for working with the datasetsklearn
: used to split the dataset and then apply the linear regression class onto the data.dataset = pd.read_csv("Salary_Data.csv")
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
# X is taking all the values except the last
# column whereas y is taking the last value
X_train, X_test, y_train, y_test = train_test_split(
X,y, test_size = 0.2, random_state = 0)
# X_train contains the independent varibale
# y_train contains the dependent variable
train_test_split
function that we imported from sklearn.model_selection
.regressor = LinearRegression()
regressor.fit(X_train, y_train)
fit()
method of the LinearRegression Classplt.scatter(X_train, y_train, color= "red")
# Plotting the data
plt.plot(X_train, regressor.predict(X_train), color="blue" )
# Add title to the plot
plt.title("Salary vs Experience(train)")
# Labels on x and y axis
plt.xlabel("Years of Experience")
plt.ylabel("Salary")
#Finally, display the plot
plt.show()
plt.scatter(X_test, y_test, color= "red")
# Here we are not replacing X_train with X_test because this line tells us about the data predicted and how close our results are to the training set
plt.plot(X_train, regressor.predict(X_train), color="blue" )
# Add title and labels
plt.title("Salary vs Experience (test)")
plt.xlabel("Years of Experience")
plt.ylabel("Salary")
# Finally, display the plot
plt.show()