-
Notifications
You must be signed in to change notification settings - Fork 2
/
ability_bounds.py
521 lines (445 loc) · 19.1 KB
/
ability_bounds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
"""
Classes to fit item response theory (IRT) models to data, including
lower and upper bounds for ability parameters.
"""
# Faster Confidence Intervals for Item Response Theory
# via an Approximate Likelihood Profile
# Copyright (C) 2021-2022
# Benjamin Paaßen
# German Research Center for Artificial Intelligence
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
__author__ = 'Benjamin Paaßen'
__copyright__ = 'Copyright 2021-2022, Benjamin Paaßen'
__license__ = 'GPLv3'
__version__ = '0.1.0'
__maintainer__ = 'Benjamin Paaßen'
__email__ = 'benjamin.paassen@dfki.de'
import numpy as np
from scipy.optimize import minimize
from scipy.optimize import root_scalar
from scipy.stats import norm
from scipy.stats import chi2
class IRT:
""" An implementation of basic IRT from which we can inherit.
Attributes
----------
regul: float (default = 1.)
The L2 regularization strength C when fitting the IRT model.
theta_: ndarray
The learned ability parameters.
b_: ndarray
The learned difficulties.
"""
def __init__(self, regul = 1.):
self.regul = regul
def fit(self, X):
m, n = X.shape
# extract false and correct answers
Pos = X > 0.5
Neg = X < 0.5
Nan = np.isnan(X)
# set up IRT objective function for the current dataset
def irt_obj(theta, b):
# compute predicted probability
Z = np.expand_dims(theta, 1) - np.expand_dims(b, 0)
P = np.zeros_like(Z)
hi = Z > 10.
P[hi] = 1.
mid = np.abs(Z) <= 10.
P[mid] = 1. / (1. + np.exp(-Z[mid]))
# compute gradients
Delta = P - X
Delta[Nan] = 0.
thetagrad = np.sum(Delta, 1) + self.regul * theta
bgrad = -np.sum(Delta, 0) + self.regul * b
# check for degenerate cases, where the log would return infinity
if np.any(P[Pos] < 1E-8) or np.any(1. - P[Neg] < 1E-8):
return np.inf, thetagrad, bgrad
# compute loss
l = -np.sum(np.log(P[Pos]))-np.sum(np.log(1. - P[Neg])) + self.regul * 0.5 * (np.sum(np.square(theta)) + np.sum(np.square(b)))
# return
return l, thetagrad, bgrad
# save it as property of this class
self.irt_obj_ = irt_obj
# set up an interface for a single parameter vector
def obj(params):
theta = params[:m]
b = params[m:]
l, thetagrad, bgrad = irt_obj(theta, b)
return l, np.concatenate((thetagrad, bgrad))
# call optimize
res = minimize(obj, np.zeros(m+n), jac = True)
# retrieve result
self.theta_ = res.x[:m]
self.b_ = res.x[m:]
self.res_ = res
# return
return self
def predict(self, X = None):
Z = np.expand_dims(self.theta_, 1) - np.expand_dims(self.b_, 0)
Z[Z < 0.] = 0.
Z[Z > 0.] = 1.
return Z
def predict_proba(self, X = None):
Z = np.expand_dims(self.theta_, 1) - np.expand_dims(self.b_, 0)
return 1. / (1. + np.exp(-Z))
class BarrierBounds(IRT):
""" An implementation of our proposed scheme to identify
ability bounds. We try to find bounds which increase the
negative log likelihood by at most a factor of (1 + delta).
To ensure this, we use a log-barrier method with a barrier
factor of mu.
Attributes
----------
regul: float (default = 1.)
The L2 regularization strength C when fitting the IRT model.
delta: float (default = 0.01)
The factor by which we are allowed to exceed the minimum
negative log likelihood.
absolute_bound: float (default = None)
If an absolute bound is desired instead of a relative bound.
This is mostly relevant to remain comparable to the Likelihood
Profile method.
mu: float (default = 0.01)
The factor for the log barrier term in the loss.
theta_: ndarray
The learned ability parameters.
b_: ndarray
The learned difficulties.
theta_min_: ndarray
The lower bounds for theta.
theta_max_: ndarray
The upper bounds for theta.
"""
def __init__(self, regul = 1., delta = 0.01, absolute_bound = None, mu = 0.01):
super(BarrierBounds, self).__init__(regul)
self.delta = delta
self.absolute_bound = absolute_bound
self.mu = mu
def fit(self, X):
# fit the standard IRT model
super(BarrierBounds, self).fit(X)
m, n = X.shape
# generate mu to ensure that the bounds will be valid
mu = self.mu * min(1, self.delta * self.res_.fun / np.max(np.abs(self.res_.jac[:m])))
# set up loss bound
if self.absolute_bound is None:
loss_bound = (1. + self.delta) * self.res_.fun
else:
loss_bound = self.res_.fun + self.absolute_bound
# set up initial parameters
init_params = np.concatenate((self.theta_, self.b_))
# start identifying bounds for the ability parameters
self.theta_min_ = np.zeros(m)
self.theta_max_ = np.zeros(m)
for i in range(m):
# construct objective for lower bound
def obj(params):
theta = params[:m]
b = params[m:]
# compute regular gradient
l, thetagrad, bgrad = self.irt_obj_(theta, b)
# check loss delta for a degenerate case where the log would be infinite
loss_delta = loss_bound - l
if loss_delta <= 0.:
return np.inf, np.concatenate((thetagrad, bgrad))
# compute loss for the current problem
loss = +theta[i] + mu * np.log(loss_delta)
# compute gradients
thetagrad = mu / loss_delta * thetagrad
thetagrad[i] += 1.
bgrad = mu / loss_delta * bgrad
# return
return loss, np.concatenate((thetagrad, bgrad))
# call optimize
res = minimize(obj, init_params, method = 'L-BFGS-B', jac = True, options = {'maxls' : 100 })
# retrieve result
self.theta_min_[i] = res.x[i]
# construct objective for upper bound
def obj(params):
theta = params[:m]
b = params[m:]
# compute regular gradient
l, thetagrad, bgrad = self.irt_obj_(theta, b)
# check loss delta for a degenerate case where the log would be infinite
loss_delta = loss_bound - l
if loss_delta <= 0.:
return np.inf, np.concatenate((thetagrad, bgrad))
# compute loss for the current problem
loss = -theta[i] + mu * np.log(loss_delta)
# compute gradients
thetagrad = mu / loss_delta * thetagrad
thetagrad[i] -= 1.
bgrad = mu / loss_delta * bgrad
# return
return loss, np.concatenate((thetagrad, bgrad))
res = minimize(obj, init_params, method = 'L-BFGS-B', jac = True, options = {'maxls' : 100 })
# retrieve result
self.theta_max_[i] = res.x[i]
return self
class AOBounds(IRT):
""" An implementation of our proposed scheme to identify
ability bounds. We try to find bounds which increase the
negative log likelihood by at most a factor of (1 + delta).
To ensure this, we use a special solver for alternating
optimization where we only change theta[i] and then all other
parameters.
Attributes
----------
regul: float (default = 1.)
The L2 regularization strength C when fitting the IRT model.
delta: float (default = 0.01)
The factor by which we are allowed to exceed the minimum
negative log likelihood.
absolute_bound: float (default = None)
If an absolute bound is desired instead of a relative bound.
This is mostly relevant to remain comparable to the Likelihood
Profile method.
num_iterations: int (default = 4)
The number of binary search iterations to look for the
bounds.
theta_: ndarray
The learned ability parameters.
b_: ndarray
The learned difficulties.
theta_min_: ndarray
The lower bounds for theta.
theta_max_: ndarray
The upper bounds for theta.
"""
def __init__(self, regul = 1., delta = 0.01, absolute_bound = None, num_iterations = 4):
super(AOBounds, self).__init__(regul)
self.delta = delta
self.absolute_bound = absolute_bound
self.num_iterations = num_iterations
def find_root_with_ao_(self, X, Pos, Neg, Nan, i, theta_i_init, li, lnoti, loss_bound):
""" This function implements a custom root solver which
ensures that lnoti + li becomes almost exactly equal to the
loss bound.
"""
m, n = X.shape
theta = self.theta_
b = self.b_
theta_i = theta_i_init
for t in range(self.num_iterations):
# compute the current loss that is due to theta[i]
# but only in iterations larger zero because the initial
# value is already given in the parent function
if t > 0:
# compute predicted probability
Z = np.expand_dims(theta, 1) - np.expand_dims(b, 0)
P = np.zeros_like(Z)
hi = Z > 10.
P[hi] = 1.
mid = np.abs(Z) <= 10.
P[mid] = 1. / (1. + np.exp(-Z[mid]))
# compute loss
l = -np.sum(np.log(P[Pos]))-np.sum(np.log(1. - P[Neg])) + self.regul * 0.5 * (np.sum(np.square(theta)) + np.sum(np.square(b)))
# compute loss for i only
li = -np.sum(np.log(P[i, Pos[i, :]]))-np.sum(np.log(1. - P[i, Neg[i, :]])) + self.regul * 0.5 * theta[i] ** 2
lnoti = l - li
# find the theta_i value that yields exactly the loss
# bound, provided that we don't change the other parameters.
# This will be an overestimate of the actual loss we can
# achieve for this theta_i value
# set up the objective function for root finding
def root_obj(theta_i):
# compute predicted probability
z = theta_i - b
p = np.zeros_like(z)
hi = z > 10.
p[hi] = 1.
mid = np.abs(z) <= 10.
p[mid] = 1. / (1. + np.exp(-z[mid]))
# compute gradients
delta = p - X[i, :]
delta[Nan[i, :]] = 0.
thetagrad = np.sum(delta) + self.regul * theta_i
# check for degenerate cases, where the log would return infinity
if np.any(p[Pos[i, :]] < 1E-8) or np.any(1. - p[Neg[i, :]] < 1E-8):
return np.inf, thetagrad
# compute loss
li = -np.sum(np.log(p[Pos[i, :]]))-np.sum(np.log(1. - p[Neg[i, :]])) + self.regul * 0.5 * theta_i ** 2
l = lnoti + li
# return
return l - loss_bound, thetagrad
# start root finding process
res = root_scalar(root_obj, x0 = theta_i, fprime = True)
# the root is our new theta_i value
theta_i = res.root
# avoid another costly L-BFGS-B optimization if we
# are in the last iteration
if t == self.num_iterations - 1:
break
# optimize the negative log likelihood with all other
# parameters except theta[i]
def obj(params):
theta = params[:m]
theta[i] = theta_i
b = params[m:]
# compute regular gradient
l, thetagrad, bgrad = self.irt_obj_(theta, b)
thetagrad[i] = 0.
return l, np.concatenate((thetagrad, bgrad))
# call optimize
res = minimize(obj, np.concatenate((theta, b)), method = 'L-BFGS-B', jac = True, options = {'maxls' : 100 })
theta = res.x[:m]
theta[i] = theta_i
b = res.x[m:]
return theta_i
def fit(self, X):
# fit the standard IRT model
super(AOBounds, self).fit(X)
m, n = X.shape
# set up loss bound
if self.absolute_bound is None:
loss_bound = (1. + self.delta) * self.res_.fun
else:
loss_bound = self.res_.fun + self.absolute_bound
# compute the loss separately for all i
Pos = X > 0.5
Neg = X < 0.5
Nan = np.isnan(X)
# compute predicted probability
Z = np.expand_dims(self.theta_, 1) - np.expand_dims(self.b_, 0)
P = np.zeros_like(Z)
hi = Z > 10.
P[hi] = 1.
mid = np.abs(Z) <= 10.
P[mid] = 1. / (1. + np.exp(-Z[mid]))
# compute loss
L = np.zeros(m)
for i in range(m):
L[i] = -np.sum(np.log(P[i, Pos[i, :]]))-np.sum(np.log(1. - P[i, Neg[i, :]]))
L = L + self.regul * 0.5 * np.square(self.theta_)
# start identifying bounds for the ability parameters
self.theta_min_ = np.zeros(m)
self.theta_max_ = np.zeros(m)
for i in range(m):
# start search for lower bound.
self.theta_min_[i] = self.find_root_with_ao_(X, Pos, Neg, Nan, i, self.theta_[i] - 1, L[i], self.res_.fun - L[i], loss_bound)
# start search for upper bound.
self.theta_max_[i] = self.find_root_with_ao_(X, Pos, Neg, Nan, i, self.theta_[i] + 1, L[i], self.res_.fun - L[i], loss_bound)
return self
class LikelihoodProfile(IRT):
""" This is an implementation of the Likelihood profile
estimation method proposed by Chalmers, Pek, & Liu (2017).
doi:10.1080/00273171.2017.1329082.
This method first trains a vanilla IRT model and then
performs a separate optimization for each ability parameter,
where we look for the value which is as small/large as possible
such that a likelihood ratio test would not reject the null
hypothesis at the desired confidence level.
This is equivalent to minimizing/maximizing the parameter
value such that twice the difference between the log likelihood
and the optimal log likelihood is equal to the chi-square
quantile for the desired confidence level.
Attributes
----------
regul: float (default = 1.)
The L2 regularization strength C when fitting the IRT model.
alpha: float (default = .95)
The desired confidence level.
theta_: ndarray
The learned ability parameters.
b_: ndarray
The learned difficulties.
theta_min_: ndarray
The lower bounds for theta.
theta_max_: ndarray
The upper bounds for theta.
"""
def __init__(self, regul = 1., alpha = .95):
super(LikelihoodProfile, self).__init__(regul)
self.alpha = alpha
def fit(self, X):
# fit the standard IRT model
super(LikelihoodProfile, self).fit(X)
m, n = X.shape
# set up an upper bound for the loss via the chi square
# quantile function
bound = 2*self.res_.fun + chi2.ppf(self.alpha, df = 1)
# set up initial parameters
init_params = np.concatenate((self.theta_, self.b_))
# start identifying bounds for the ability parameters
self.theta_min_ = np.zeros(m)
self.theta_max_ = np.zeros(m)
for i in range(m):
# construct objective for root finding
def root_obj(theta_i):
# construct inner objective for maximizing the
# likelihood at fixed theta_i
def obj(params):
theta = params[:m]
theta[i] = theta_i
b = params[m:]
# compute regular gradient
l, thetagrad, bgrad = self.irt_obj_(theta, b)
thetagrad[i] = 0.
return l, np.concatenate((thetagrad, bgrad))
# perform inner optimization
res = minimize(obj, init_params, method = 'L-BFGS-B', jac = True, options = {'maxls' : 100 })
# compute loss for root finding
root_loss = 2*res.fun - bound
# compute derivative for root finding
theta = res.x[:m]
theta[i] = theta_i
b = res.x[m:]
_, thetagrad, _ = self.irt_obj_(theta, b)
return root_loss, 2*thetagrad[i]
# perform root finding process for lower bound, starting
# well above the optimal value
res = root_scalar(root_obj, x0 = self.theta_[i] - 1, fprime = True)
self.theta_min_[i] = res.root
# perform root finding process for upper bound, starting
# well above the optimal value
res = root_scalar(root_obj, x0 = self.theta_[i] + 1, fprime = True)
self.theta_max_[i] = res.root
return self
class WaldBounds(IRT):
""" This is an implementation of the Wald method for
confidence bounds as explained in Chalmers, Pek, & Liu (2017).
doi:10.1080/00273171.2017.1329082.
This method first trains a vanilla IRT model and then
uses the L-BFGS estimate for the inverse Hessian to estimate
where the negative log likelihood would exceed a certain bound.
Attributes
----------
regul: float (default = 1.)
The L2 regularization strength C when fitting the IRT model.
alpha: float (default = .95)
The desired confidence level.
theta_: ndarray
The learned ability parameters.
b_: ndarray
The learned difficulties.
theta_min_: ndarray
The lower bounds for theta.
theta_max_: ndarray
The upper bounds for theta.
"""
def __init__(self, regul = 1., alpha = .95):
super(WaldBounds, self).__init__(regul)
self.alpha = alpha
def fit(self, X):
# fit the standard IRT model
super(WaldBounds, self).fit(X)
m, n = X.shape
# infer standard deviations for parameters based on the
# square root of the diagonal of the inverse Hessian
stds = np.sqrt(np.diag(self.res_.hess_inv)[:m])
# multiply standard deviations with Gaussian quantile
scalar = norm.ppf(1. - .5 * (1. - self.alpha))
self.theta_min_ = self.theta_ - scalar * stds
self.theta_max_ = self.theta_ + scalar * stds
return self