import numpy as np # import numpy for linear algebra and mathematical operations

def generate_dataset(samples=100, noise=0.2, flipped=0.05, test_size=0.2): # define the function and its inputs
    # generate features
    class_1 = np.random.randn(samples, 2) + np.array([2, 2]) # generate class 1s samples
    class_2 = np.random.randn(samples, 2) + np.array([-2, -2]) # generate samples for class 2

    X = np.vstack([class_1, class_2]) # stack samples into one array

    # generate labels
    y = np.hstack([np.ones(samples), -np.ones(samples)]) # add labels for each sample

    # add noise
    X += np.random.randn(*X.shape) * noise # add noise to samples

    flips = int(len(y) * flipped) # calculate amount of labels to flip
    flip_index = np.random.choice(len(y), flips, replace=False) # pick indexes to flip
    y[flip_index] *= -1 # add noise to labels by flipping

    return X, y # return dataset

X, y = generate_dataset(noise=0.3) # call the function to generate data

def split_data(X, y, test_size=0.2):
    # shuffle indices for every sample
    indices = np.arange(X.shape[0]) # create array of possible indices
    np.random.shuffle(indices) # shuffle indices randomly

    # calculate index to seperate at
    split_index = int(X.shape[0] * (1 - test_size))

    # split into train/test sets
    train_indices = indices[:split_index] # select training indices
    test_indices = indices[split_index:] # select testing indices

    # seperate target and features into training/testing sets
    X_train = X[train_indices] # seperate training features
    X_test = X[test_indices] # seperate training target
    y_train = y[train_indices].flatten() # seperate testing features
    y_test = y[test_indices].flatten() # seperate training target

    return X_train, X_test, y_train, y_test # return split dataset

X_train, X_test, y_train, y_test = split_data(X, y)

def train(X, y, learning_rate=0.001, penalty=1.0, epochs=5000): # defin training function with inputs
    samples, features = X.shape # determine the amount of samples and features in the dataset

    weights = np.zeros(features) # initialize a weight for each feature
    bias = 0.0 # initialize the bias

    for epoch in range(epochs): # training loop, 1 cycle per epoch
        for i in range(samples): # iterate through each sample in the data
            prediction = np.dot(X[i], weights) + bias # predict current sample with the current weights and bias
            condition = y[i] * prediction >= 1 # check if sample was correctly classified

            if condition: # if the sample was correctly classified use regularization gradient
                derivative_weights = weights # set derivitive weights to the current weights
                derivative_bias = 0 # set the derivative bias to 0
            else: # if the sample was incorrectly classified, use hinge loss gradient
                derivative_weights = weights - penalty * y[i] * X[i] # calculate derivitve weights
                derivative_bias = -penalty * y[i] # calculate derivative bias

            weights -= learning_rate * derivative_weights # update weights using derivative
            bias -= learning_rate * derivative_bias # update bias using derivative

    return weights, bias # return learned weights and bias after training

weights, bias = train(X_train, y_train) # train a model on our dataset

def predict(X, w, b): # define predict function with input for data, weights, and bias
    return np.sign(np.dot(X, w) + b) # return the predicted class

predictions = predict(X_test, weights, bias) # predict the X data

accuracy = np.mean(predictions == y_test) # calculate accuracy score
print(f"Model Accuracy: {accuracy*100:.2f}%") # print accuracy score as percentage

Model Accuracy: 95.00%

import matplotlib.pyplot as plt # import for graphing

plt.figure(figsize=(8,8)) # create figure
plt.scatter(X[:, 0], X[:, 1], c=y) # plot data
x1 = np.linspace(X_test[:,0].min(), X_test[:,0].max(), 200) # x values across range
x2 = -(weights[0] * x1 + bias) / weights[1] # x values using decision boundry
plt.plot(x1, x2, label="Decision Boundry") # plot decision boundry linne

plt.legend() # add a legend
plt.show() # print the chart

SVM From Scratch¶

Overview¶

Introduction¶

Code¶

Step 1. Generate and Split Data¶

Step 2. Model Training¶

Step 3. Predict Data¶

Step 4. Model Evaluation¶

Author and Liscense¶