/
ft_statfun_roc.m
162 lines (141 loc) · 5.42 KB
/
ft_statfun_roc.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
function [s, cfg] = ft_statfun_roc(cfg, dat, design)
% FT_STATFUN_ROC computes the area under the curve (AUC) of the Receiver Operator
% Characteristic (ROC). This is a measure of the separability of the data observed in
% two conditions. The AUC can be used for statistical testing whether the two
% conditions can be distinguished on the basis of the data.
%
% Use this function by calling one of the high-level statistics functions as
% [stat] = ft_timelockstatistics(cfg, timelock1, timelock2, ...)
% [stat] = ft_freqstatistics(cfg, freq1, freq2, ...)
% [stat] = ft_sourcestatistics(cfg, source1, source2, ...)
% with the following configuration option
% cfg.statistic = 'ft_statfun_roc'
%
% The experimental design is specified as:
% cfg.ivar = independent variable, row number of the design that contains the labels of the conditions to be compared (default=1)
%
% The labels for the independent variable should be specified as the number 1 and 2.
%
% Note that this statfun performs a one sided test in which condition "1" is assumed
% to be larger than condition "2". This function does not compute an analytic
% probability of condition "1" being larger than condition "2", but can be used in a
% randomization test, including clustering.
%
% A low-level example with 10 channel-time-frequency points and 1000 observations per
% condition goes like this:
% dat1 = randn(10,1000) + 1;
% dat2 = randn(10,1000);
% design = [1*ones(1,1000) 2*ones(1,1000)];
% stat = ft_statfun_roc([], [dat1 dat2], design);
%
% See also FT_TIMELOCKSTATISTICS, FT_FREQSTATISTICS or FT_SOURCESTATISTICS
% Copyright (C) 2008, Robert Oostenveld
%
% This file is part of FieldTrip, see http://www.fieldtriptoolbox.org
% for the documentation and details.
%
% FieldTrip is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% FieldTrip is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with FieldTrip. If not, see <http://www.gnu.org/licenses/>.
%
% $Id$
% set the defaults
cfg.ivar = ft_getopt(cfg, 'ivar', 1);
% on-the-fly log transformation is not supported any more, please use FT_MATH instead
cfg = ft_checkconfig(cfg, 'forbidden', 'logtransform');
cfg = ft_checkconfig(cfg, 'forbidden', 'numbins');
% start with a quick test to see whether there appear to be NaNs
if any(isnan(dat(1,:)))
% exclude trials that contain NaNs for all observed data points
sel = all(isnan(dat),1);
dat = dat(:,~sel);
design = design(:,~sel);
end
% logical indexing is faster than using find(...)
selA = (design(cfg.ivar,:)==1);
selB = (design(cfg.ivar,:)==2);
% select the data in the two classes
datA = dat(:, selA);
datB = dat(:, selB);
nobs = size(dat,1);
na = size(datA,2);
nb = size(datB,2);
auc = zeros(nobs, 1);
for k = 1:nobs
% compute the area under the curve for each channel/time/frequency
a = datA(k,:);
b = datB(k,:);
% to speed up the AUC, the critical value is determined by the actual
% values in class B, which also ensures a regular sampling of the False Alarms
b = sort(b);
ca = zeros(nb+1,1);
ib = zeros(nb+1,1);
% cb = zeros(nb+1,1);
% ia = zeros(nb+1,1);
for i=1:nb
% for the first approach below, the critval could also be choosen based on e.g. linspace(min,max,n)
critval = b(i);
% for each of the two distributions, determine the number of correct and incorrect assignments given the critical value
% ca(i) = sum(a>=critval);
% ib(i) = sum(b>=critval);
% cb(i) = sum(b<critval);
% ia(i) = sum(a<critval);
% this is a much faster approach, which works due to using the sorted values in b as the critical values
ca(i) = sum(a>=critval); % correct assignments to class A
ib(i) = nb-i+1; % incorrect assignments to class B
end
% add the end point
ca(end) = 0;
ib(end) = 0;
% cb(end) = nb;
% ia(end) = na;
hits = ca/na;
fa = ib/nb;
% the numerical integration is faster if the points are sorted
hits = fliplr(hits);
fa = fliplr(fa);
if false
% this part is optional and should only be used when exploring the data
figure
plot(fa, hits, '.-')
xlabel('false positive');
ylabel('true positive');
title('ROC-curve');
end
% compute the area under the curve using numerical integration
auc(k) = numint(fa, hits);
end
% return the area under the curve as the statistic of interest
s.stat = auc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% NUMINT computes a numerical integral of a set of sampled points using
% linear interpolation. Alugh the algorithm works for irregularly sampled points
% along the x-axis, it will perform best for regularly sampled points
%
% Use as
% z = numint(x, y)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function z = numint(x, y)
if ~all(diff(x)>=0)
% ensure that the points are sorted along the x-axis
[x, i] = sort(x);
y = y(i);
end
n = length(x);
z = 0;
for i=1:(n-1)
x0 = x(i);
y0 = y(i);
dx = x(i+1)-x(i);
dy = y(i+1)-y(i);
z = z + (y0 * dx) + (dy*dx/2);
end