-
Notifications
You must be signed in to change notification settings - Fork 2
/
select_parameter.py
85 lines (69 loc) · 2.87 KB
/
select_parameter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import numpy as np
from implementations import *
from helpers import *
from process_data import *
from crossvalidation import *
def select_parameters_ridge_regression_jet(y,tX,degrees,lambdas,alphas,k_fold,seed):
"""
Given the training set and a set of tuples of parameters (alphas, lamdas, degrees)
for each jet_subset returns the tuple which maximize the accuracy predicted through Cross Validation
"""
par_degree = []
par_lamb = []
par_alpha = []
accus = []
# Split the training set in subsets according to the jet value
msk_jets = get_jet_masks(tX)
for idx in range(len(msk_jets)):
tx = tX[msk_jets[idx]]
ty = y[msk_jets[idx]]
degree,lamb,alpha,accu = select_parameters_ridge_regression(degrees, lambdas, alphas, k_fold, ty, tx, seed)
par_degree.append(degree)
par_lamb.append(lamb)
par_alpha.append(alpha)
accus.append(accu)
return par_degree, par_lamb, par_alpha, accus
def select_parameters_ridge_regression(degrees, lambdas, alphas, k_fold, y, tx, seed):
"""
Given the training set and a set of tuples of parameters (alphas, lamdas, degrees)
returns the tuple which maximize the accuracy predicted through Cross Validation
"""
# split data in k fold
k_indices = build_k_indices(y, k_fold, seed)
comparison = []
for degree in degrees:
for lamb in lambdas:
for alpha in alphas:
accs_test = []
for k in range(k_fold):
_, acc_test = cross_validation(y, tx, ridge_regression, k_indices, k, degree, alpha, lamb)
accs_test.append(acc_test)
comparison.append([degree,lamb,alpha,np.mean(accs_test)])
comparison = np.array(comparison)
ind_best = np.argmax(comparison[:,3])
best_degree = comparison[ind_best,0]
best_lamb = comparison[ind_best,1]
best_alpha = comparison[ind_best,2]
accu = comparison[ind_best,3]
return best_degree, best_lamb, best_alpha, accu
def accuracy_per_parameters(degrees, lambdas, alphas, k_fold, y, tX, jet):
# condider a specific jet
msk_jets = get_jet_masks(tX)
tX = tX[msk_jets[jet]]
y = y[msk_jets[jet]]
# split data in k fold
k_indices = build_k_indices(y, k_fold, 10)
accu_te = []
accu_tr = []
for degree in degrees:
for lamb in lambdas:
for alpha in alphas:
accs_test = []
accs_train = []
for k in range(k_fold):
acc_train, acc_test = cross_validation(y, tX, ridge_regression, k_indices, k, degree, alpha, lamb)
accs_test.append(acc_test)
accs_train.append(acc_train)
accu_te.append(np.mean(accs_test))
accu_tr.append(np.mean(accs_train))
return accu_tr, accu_te