-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
37 lines (34 loc) · 1.38 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import joblib
from sklearn import datasets
from sklearn.model_selection import train_test_split
from skimage.feature import hog
from sklearn.svm import LinearSVC
import numpy as np
from matrix_confusion import plot_confusion_matrix_train
from sklearn.metrics import accuracy_score
# Load the MNIST dataset (subset of the Digits dataset.)
mnist = datasets.fetch_openml("mnist_784", as_frame=False, parser='liac-arff')
features = np.array(mnist.data.astype(int))
labels = np.array(mnist.target.astype(int))
# Histogram of Oriented Gradients
list_hog_fd = []
# Iterates through each feature (image) in the dataset.
for feature in features:
fd, _ = hog(feature.reshape((28, 28)), orientations=9, pixels_per_cell=(
14, 14), cells_per_block=(1, 1), visualize=True)
list_hog_fd.append(fd)
hog_features = np.array(list_hog_fd, 'float64')
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
hog_features, labels, test_size=0.2, random_state=42)
# Train the Linear SVM with dual set to False
clf = LinearSVC(dual=False)
clf.fit(X_train, y_train)
predicted = clf.predict(X_test)
# Save the classifier
joblib.dump(clf, "digits_cls.pkl", compress=3)
# accuracy percentage
print("accuracy_score: ")
print(accuracy_score(y_test, predicted))
# Compute and display the confusion matrix for the test set
plot_confusion_matrix_train(y_test, predicted)