21
loading...
This website collects cookies to deliver better user experience
Time to talk Calculus.
Property: At a certain point, the gradient vector always points towards the direction of the greatest increase of that function.
Since we need to go in the direction of greatest decrease that's why we follow the direction of negative of the gradient vector.
Gradient vector is always perpendicular to the contour lines of the graph of a function (we'll be dealing Contour graphs later)
Note: θ0 represent the bias term
Note: All the code files can be found on Github through this link.
And it's highly recommended to follow the notebook along with this section for better understanding.
class LinearRegression:
def __init__(self) -> None:
self.X = None
self.Y = None
self.parameters = None
self.cost_history = []
self.mu = None
self.sigma = None
def calculate_cost(self):
"""
Returns the cost and gradients.
parameters: None
Returns:
cost : Caculated loss (scalar).
gradients: array containing the gradients w.r.t each parameter
"""
m = self.X.shape[0]
y_hat = np.dot(self.X, self.parameters)
y_hat = y_hat.reshape(-1)
error = y_hat - self.Y
cost = np.dot(error.T, error)/(2*m) # Modified way to calculate cost
gradients = np.zeros(self.X.shape[1])
for i in range(self.X.shape[1]):
gradients[i] = np.mean(error * self.X[:,i])
return cost, gradients
def init_parameters(self):
"""
Initialize the parameters as array of 0s
parameters: None
Returns:None
"""
self.parameters = np.zeros((self.X.shape[1],1))
def feature_normalize(self, X):
"""
Normalize the samples.
parameters:
X : input/feature matrix
Returns:
X_norm : Normalized X.
"""
X_norm = X.copy()
mu = np.mean(X, axis=0)
sigma = np.std(X, axis=0)
self.mu = mu
self.sigma = sigma
for n in range(X.shape[1]):
X_norm[:,n] = (X_norm[:,n] - mu[n]) / sigma[n]
return X_norm
def fit(self, x, y, learning_rate=0.01, epochs=500, is_normalize=True, verbose=0):
"""
Iterates and find the optimal parameters for input dataset
parameters:
x : input/feature matrix
y : target matrix
learning_rate: between 0 and 1 (default is 0.01)
epochs: number of iterations (default is 500)
is_normalize: boolean, for normalizing features (default is True)
verbose: iterations after to print cost
Returns:
parameters : Array of optimal value of weights.
"""
self.X = x
self.Y = y
self.cost_history = []
if self.X.ndim == 1: # adding extra dimension, if X is a 1-D array
self.X = self.X.reshape(-1,1)
is_normalize = False
if is_normalize:
self.X = self.feature_normalize(self.X)
self.X = np.concatenate([np.ones((self.X.shape[0],1)), self.X], axis=1)
self.init_parameters()
for i in range(epochs):
cost, gradients = self.calculate_cost()
self.cost_history.append(cost)
self.parameters -= learning_rate * gradients.reshape(-1,1)
if verbose:
if not (i % verbose):
print(f"Cost after {i} epochs: {cost}")
return self.parameters
def predict(self,x, is_normalize=True):
"""
Returns the predictions after fitting.
parameters:
x : input/feature matrix
Returns:
predictions : Array of predicted target values.
"""
x = np.array(x, dtype=np.float64) # converting list to numpy array
if x.ndim == 1:
x = x.reshape(1,-1)
if is_normalize:
for n in range(x.shape[1]):
x[:,n] = (x[:,n] - self.mu[n]) / self.sigma[n]
x = np.concatenate([np.ones((x.shape[0],1)), x], axis=1)
return np.dot(x,self.parameters)
calculate_cost
: This method actually uses the formulas we derived in the previous section to calculate the cost according to certain parameters. If you carefully go through the method you may find a weird thing that initially we mentioned cost as:fit
: This is the method where the actual magic happens. It firstly normalizes the features then add an extra feature of all 1s for the bias term and lastly, it keeps iterating to calculate the cost and gradients then update each parameter simultaneously.Note: We first normalize the features then we add an extra feature of 1s for bias term because it doesn't make any sense to normalize that extra feature that contains all 1s
predict
: This method first normalizes the input then uses the optimal parameters calculated by the fit
method to return the predicted target values.Note: predict
method uses the same μ and σ that we calculated during the training loop from the training set to normalize the input.
LinearRegression
class and fit this data on it for 500 epochs to get the optimal parameters for our hypothesis.LinearRegression
class had a property of cost_history
and it stores the cost after each iteration, let's plot it:make_regression
of 1 feature and a target column then apply the following transformation on it to make that data non-linearThe process of fitting and predicting is the same as shown in the previous section, or you can also refer to the notebook for better clarity.
LinearRegression
class. We only need to modify the calculate_cost
method because only this method is responsible for calculating both cost and gradients. The modified version is shown below:class LinearRegression:
def __init__(self) -> None:
self.X = None
self.Y = None
self.parameters = None
self.cost_history = []
self.mu = None
self.sigma = None
def calculate_cost(self, lambda_=0):
"""
Returns the cost and gradients.
parameters:
lambda_ : value of regularization parameter (default is 0)
Returns:
cost : Caculated loss (scalar).
gradients: array containing the gradients w.r.t each parameter
"""
m = self.X.shape[0]
y_hat = np.dot(self.X, self.parameters)
y_hat = y_hat.reshape(-1)
error = y_hat - self.Y
cost = (np.dot(error.T, error) + lambda_*np.sum((self.parameters)**2))/(2*m)
gradients = np.zeros(self.X.shape[1])
for i in range(self.X.shape[1]):
gradients[i] = (np.mean(error * self.X[:,i]) + (lambda_*self.parameters[i])/m)
return cost, gradients
def init_parameters(self):
"""
Initialize the parameters as array of 0s
parameters: None
Returns:None
"""
self.parameters = np.zeros((self.X.shape[1],1))
def feature_normalize(self):
"""
Normalize the samples.
parameters:
X : input/feature matrix
Returns:
X_norm : Normalized X.
"""
X_norm = self.X.copy()
mu = np.mean(self.X, axis=0)
sigma = np.std(self.X, axis=0)
self.mu = mu
self.sigma = sigma
for n in range(self.X.shape[1]):
X_norm[:,n] = (X_norm[:,n] - mu[n]) / sigma[n]
return X_norm
def fit(self, x, y, learning_rate=0.01, epochs=500, lambda_=0, is_normalize=True, verbose=0):
"""
Iterates and find the optimal parameters for input dataset
parameters:
x : input/feature matrix
y : target matrix
learning_rate: between 0 and 1 (default is 0.01)
epochs: number of iterations (default is 500)
is_normalize: boolean, for normalizing features (default is True)
verbose: iterations after to print cost
Returns:
parameters : Array of optimal value of weights.
"""
self.X = x
self.Y = y
self.cost_history = []
if self.X.ndim == 1: # adding extra dimension, if X is a 1-D array
self.X = self.X.reshape(-1,1)
is_normalize = False
if is_normalize:
self.X = self.feature_normalize()
self.X = np.concatenate([np.ones((self.X.shape[0],1)), self.X], axis=1)
self.init_parameters()
for i in range(epochs):
cost, gradients = self.calculate_cost(lambda_=lambda_)
self.cost_history.append(cost)
self.parameters -= learning_rate * gradients.reshape(-1,1)
if verbose:
if not (i % verbose):
print(f"Cost after {i} epochs: {cost}")
return self.parameters
def predict(self,x, is_normalize=True):
"""
Returns the predictions after fitting.
parameters:
x : input/feature matrix
Returns:
predictions : Array of predicted target values.
"""
x = np.array(x, dtype=np.float64) # converting list to numpy array
if x.ndim == 1:
x = x.reshape(1,-1)
if is_normalize:
for n in range(x.shape[1]):
x[:,n] = (x[:,n] - self.mu[n]) / self.sigma[n]
x = np.concatenate([np.ones((x.shape[0],1)), x], axis=1)
return np.dot(x,self.parameters)
LinearRegression
class. Let's address the previous problem of overfitting on polynomial regression by using a set of values for λ to pick the right one.