/
ft_nonlinearassociation.m
372 lines (324 loc) · 13.7 KB
/
ft_nonlinearassociation.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
function [association] = ft_nonlinearassociation(cfg, data)
% NONLINEARASSOCIATION calculate the association coefficient as a
% function of delay.
%
% In order to estimate the amount of association between all possible
% pairs of MEG sensors the nonlinear association analysis is used.
% It was first developed for EEG data analysis by Pijn and co-workers
% (Lopes da Silva, et al. 1989; Pijn, et al. 1990). The basic principle
% is similar to that of coherence and (cross) correlation, with the
% exception that this nonlinear association method can be applied
% independent of the type of relationship (linear or nonlinear) in
% the data.
%
% The method is based on the idea that if two signals x and y are
% correlated, a nonlinear regression curve can be calculated that
% represents their relationship. In practice, that regression curve
% is estimated by creating a scatterplot of y versus x, dividing the
% data in segments and describing each segment with a linear regression
% curve. The estimated correlation ratio h2, which gives the reduction
% in variance of y as a result of predicting its values according to
% the regression curve, can be calculated as follows:
%
% h^2 = (sum(Yi - mean(Y))^2 - sum(Yi - f(Xi))^2) / sum(Yi - mean(Y))^2
%
% With the sum going over N samples and f(Xi) the estimated value of
% Yi according to the regression line. The h2 coefficient has values
% between 0 (y is completely independent of x) and 1 (y is completely
% determined by x). In the case of a linear relationship between x
% and y, h2 is equal to the well known Pearson correlation coefficient
% (r2). As is the case with cross-correlation, it is possible to
% estimate h2 as a function of time shift () between the signals. The
% h2 is then iteratively calculated for different values of , by
% shifting the signals in comparison to each other, and the value for
% which the maximal h2 is reached can be used as an estimate of the
% time lag between both signals. In deciding what epoch length to use
% in the association analysis, a trade-off has to be made between
% successfully determining the correct delay and h2-value (for which
% large epoch lengths are necessary) and a small enough time-resolution
% (for which small epoch lengths are necessary).
%
% Use as
% [association] = ft_nonlinearassociation(cfg, data)
%
% The input data should be organised in a structure as obtained from
% the PREPROCESSING function.
%
% The configuration should contain
% cfg.channel = Nx1 cell-array with selection of channels (default = 'all'), see CHANNELSELECTION for details
% cfg.keeptrials = 'yes' or 'no', process the individual trials or the concatenated data (default = 'no')
% cfg.trials = 'all' or a selection given as a 1xN vector (default = 'all')
% cfg.fsample = 1200
% cfg.maxdelay = 32/cfg.fsample
% cfg.delaystep = 2/cfg.fsample
% cfg.nr_bins = 7
% cfg.offset = 0
% cfg.order = 'Hxy'
% cfg.timwin = 0.2
% cfg.toi = []
%
% References
% - Lopes da Silva F, Pijn JP, Boeijinga P. (1989): Interdependence of
% EEG signals: linear vs. nonlinear associations and the significance
% of time delays and phase shifts. Brain Topogr 2(1-2):9-18.
% - Pijn JP, Vijn PC, Lopes da Silva FH, Van Ende Boas W, Blanes W.
% (1990): Localization of epileptogenic foci using a new signal
% analytical approach. Neurophysiol Clin 20(1):1-11.
% Copyright (C) 2007, Inge Westmijse
ft_defaults
% set the defaults
if ~isfield(cfg, 'trials'), cfg.trials = 'all'; end
if ~isfield(cfg, 'keeptrials'), cfg.keeptrials = 'no'; end
if ~isfield(cfg, 'channel'), cfg.channel = 'all'; end
if ~isfield(cfg, 'toi'), cfg.toi = []; end
if ~isfield(cfg, 'timwin'), cfg.timwin = 0.2; end
if ~isfield(cfg, 'fsample'), cfg.fsample = 1200; end
if ~isfield(cfg, 'delaystep'), cfg.delaystep = 2/cfg.fsample; end
if ~isfield(cfg, 'maxdelay'), cfg.maxdelay = 32/cfg.fsample; end
if ~isfield(cfg, 'offset'), cfg.offset = 0; end
if ~isfield(cfg, 'nr_bins'), cfg.nr_bins = 7; end
if ~isfield(cfg, 'order'), cfg.order = 'Hxy'; end
if ~isfield(cfg, 'feedback'), cfg.feedback = 'textbar'; end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% do some bookkeeping on the data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% select trials of interest
if ~strcmp(cfg.trials, 'all')
if islogical(cfg.trials), cfg.trials=find(cfg.trials); end
fprintf('selecting %d trials\n', length(cfg.trials));
data.trial = data.trial(cfg.trials);
data.time = data.time(cfg.trials);
end
Ntrials = numel(data.trial);
% select channels of interest
cfg.channel = channelselection(cfg.channel, data.label);
chansel = match_str(data.label, cfg.channel);
fprintf('selecting %d channels\n', length(chansel));
for trial=1:Ntrials
data.trial{trial} = data.trial{trial}(chansel,:);
end
data.label = data.label(chansel);
Nchans = length(chansel);
% determine the size of each trial, they can be variable length
Nsamples = zeros(1,Ntrials);
for trial=1:Ntrials
Nsamples(trial) = size(data.trial{trial},2);
end
if strcmp(cfg.keeptrials, 'no')
% concatenate all the data into a 2D matrix
fprintf('concatenating data');
dat = zeros(Nchans, sum(Nsamples));
for trial=1:Ntrials
fprintf('.');
begsample = sum(Nsamples(1:(trial-1))) + 1;
endsample = sum(Nsamples(1:trial));
dat(:,begsample:endsample) = data.trial{trial};
end
fprintf('\n');
fprintf('concatenated data matrix size %dx%d\n', size(dat,1), size(dat,2));
time = [1/cfg.fsample : size(dat,1)/cfg.fsample : size(dat,1)];
data.trial = {dat};
data.time = {time};
Ntrials = 1;
else
% replace the time axis, since shifted time-axes over trials are not supported
for i = 1:Ntrials
data.time{i} = [1/cfg.fsample : 1/cfg.fsample : size(data.trial{i},2) / cfg.fsample];
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% prepare all data selection
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% item 1: channel combinations
numchannelcmb = Nchans*(Nchans-1)/2;
channelcmb = zeros(numchannelcmb, 2);
k = 1;
if (strcmp(cfg.order, 'Hxy'))
for i=1:Nchans
for j=(i+1):Nchans
channelcmb(k, :) = [i j];
k = k+1;
end
end
elseif (strcmp(cfg.order, 'Hyx'))
for i = Nchans:-1:1
for j = Nchans : -1:(i+1)
channelcmb(k,:) = [i j];
k = k+1;
end
end
end
numchannelcmb = size(channelcmb,1);
% item 2: time windows
% this requires
% cfg.toi = [t1 t2 t3 t4] center time of each window
% cfg.timwin = 0.2 in seconds
% TODO implement "time, offset, fsample"
for j = 1:Ntrials
time = data.time{j};
numtimwin = size(cfg.toi,2); % initial guess, not all might fit in this trial
timwin = zeros(numtimwin,2);
sel = zeros(numtimwin,1);
for i=1:numtimwin
timwin(i,1) = cfg.toi(i) - cfg.timwin/2; % begin of time window
timwin(i,2) = cfg.toi(i) + cfg.timwin/2; % end of time window
sel(i) = timwin(i,1)>=time(1) && timwin(i,2)<=time(end); % does it fit in?
end
timwin = timwin(find(sel==1),:);
toi_mat{j} = cfg.toi(find(sel == 1)); % update the configuration
timwin_mat{j} = round((timwin - cfg.offset) * cfg.fsample); % convert to samples
end
timwin = timwin_mat;
cfg.toi = toi_mat;
% item 3: delays within each timewindow
% this requires
% cfg.delaystep
% cfg.maxdelay
delay = -cfg.maxdelay:cfg.delaystep:cfg.maxdelay;
numdelay = length(delay);
% convert to samples
delay = round(delay * cfg.fsample);
if strcmp(cfg.keeptrials, 'yes')
for trllop=1:Ntrials
association.trial(trllop) = Do_Association_Calculation( trllop , Ntrials , cfg , timwin , numchannelcmb , numdelay , data , channelcmb , delay );
end % for each trial
else
trllop = 1;
association = Do_Association_Calculation( trllop , Ntrials , cfg , timwin , numchannelcmb , numdelay , data , channelcmb , delay );
end
% add the version details of this function call to the configuration
try
% get the full name of the function
cfg.version.name = mfilename('fullpath');
catch
% required for compatibility with Matlab versions prior to release 13 (6.5)
[st, i] = dbstack;
cfg.version.name = st(i);
end
cfg.version.id = '$Id$';
% remember the configuration details of the input data
try, cfg.previous = data.cfg; end
% remember the exact configuration details in the output
association.cfg = cfg;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% SUBFUNCTION that does the computation for each trial
function [association] = Do_Association_Calculation( trllop , Ntrials , cfg , timwin , numchannelcmb , numdelay , data , channelcmb , delay )
fprintf('\nprocessing trial %d from %d\n', trllop, Ntrials);
association = [];
numtimwin = size(timwin{trllop},1);
h2 = zeros(numchannelcmb, numtimwin, numdelay);
l = 0;
maxl = numchannelcmb*numtimwin*numdelay;
progress('init', cfg.feedback, 'Computing nonlinear association for each delay');
for i=1:numchannelcmb
dat1_chan = data.trial{trllop}(channelcmb(i,1),:);
dat2_chan = data.trial{trllop}(channelcmb(i,2),:);
progress(l/maxl,'Computing nonlinear association %d from %d', l, maxl);
for j=1:numtimwin
dat1_timwin = dat1_chan(timwin{trllop}(j,1):timwin{trllop}(j,2));
dat2_timwin = dat2_chan(timwin{trllop}(j,1):timwin{trllop}(j,2));
for k=1:numdelay
if delay(k)==0
% select the complete (unshifted) window
dat1_delay = dat1_timwin;
dat2_delay = dat2_timwin;
elseif delay(k)<0
% channel 1 is shifted w.r.t. channel 2
dat1_delay = dat1_timwin((abs(delay(k))+1):end);
dat2_delay = dat2_timwin(1:(end-abs(delay(k))));
elseif delay(k)>0
% channel 2 is shifted w.r.t. channel 1
dat1_delay = dat1_timwin(1:(end-delay(k)));
dat2_delay = dat2_timwin((delay(k)+1):end);
end
% remove the mean of each snippet of data
dat1_delay = dat1_delay - mean(dat1_delay);
dat2_delay = dat2_delay - mean(dat2_delay);
% do the computation
[sorted_data1,id] = sort(dat1_delay);
sorted_data2 = dat2_delay(id);
data_is = sorted_data1;
data_js = sorted_data2;
% divided data_i in bins
[H,mp_i] = hist(data_is,cfg.nr_bins);
% Divide data_i and data_j in bins
bp = 1;
gem_j = zeros (cfg.nr_bins,1);
for s = 1:cfg.nr_bins
gem_j(s) = mean(data_js(bp:bp+H(s)-1));
bp = bp + H(s);
end
% Calculation of line segment and variance per bin
p=1;
sp = 1;
clear unex_var tot_var;
for u = 1:cfg.nr_bins
data_is_bin = data_is(sp:sp+H(u)-1)';
data_js_bin = data_js(sp:sp+H(u)-1)';
if u == 1
fx1 = gem_j(u) + (gem_j(u+1) - gem_j(u))/(mp_i(u+1) - mp_i(u))*(data_is_bin(1:round(length(data_is_bin)/2)) - mp_i(u));
else
fx1 = gem_j(u-1) + (gem_j(u) - gem_j(u-1))/(mp_i(u) - mp_i(u-1))*(data_is_bin(1:round(length(data_is_bin)/2)) - mp_i(u-1));
end
if u == cfg.nr_bins
fx2 = gem_j(u-1) + (gem_j(u) - gem_j(u-1))/(mp_i(u) - mp_i(u-1))*(data_is_bin(round(length(data_is_bin)/2)+1:end) - mp_i(u-1));
else
fx2 = gem_j(u) + (gem_j(u+1) - gem_j(u))/(mp_i(u+1) - mp_i(u))*(data_is_bin(round(length(data_is_bin)/2)+1:end) - mp_i(u));
end
% calculation unexplained variance
ftot = [fx1; fx2];
unex_var(p:p+length(data_js_bin)-1,:) = (data_js_bin - ftot).^2;
% calculation total variance
tot_var(p:p+length(data_js_bin)-1,:) = (data_js_bin - mean(data_js)).^2;
p = p+length(data_is_bin);
sp = sp + H(u);
end
% Calculation of association coefficient h2
h2(i,j,k) = ((sum(tot_var) - sum(unex_var)) / sum(tot_var))*100;
l=l+1;
end
end
end
progress('close');
% collect the results
if (size(h2, 1) ~= numchannelcmb)
error('number of channel combinations is not right');
end
if (size(h2, 2) ~= numtimwin)
error('number of timewindows does not match input');
end
if (size(h2, 3) ~= numdelay)
error('number of delays does not match input');
end
h2 = reshape(h2, numchannelcmb*numtimwin, numdelay);
[m, indx] = max(h2, [], 2);
h2 = reshape(m, numchannelcmb, numtimwin);
d = (delay(indx)./cfg.fsample)*1000; % convert to miliseconds (to compare with original method)
d = reshape(d, numchannelcmb, numtimwin);
% FIXME this does not work: one is not supposed to rely on data.cfg.trl, use data.time please!
% association.time = cfg.toi{trllop} + data.cfg.trl(trllop,1);
association.h2 = h2;
association.delay = d;
return % from Do_Association_Calculation
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% SUBFUNCTION that computes the mean over all values without checking the dimensions
% this function is approximately 2x faster on 1 dimensional data than the matlab version
function y = mean(x)
y = sum(x(:))./numel(x);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% SUBFUNCTION that bins the elements of Y into equally spaced containers
% this function is approximately 2x faster on 1 dimensional data than the matlab version
function [nn, x] = hist(y, x)
miny = min(y);
maxy = max(y);
binwidth = (maxy - miny) ./ x;
xx = miny:binwidth:maxy;
x = xx(1:(end-1)) + binwidth/2;
% Shift bins so the interval is ( ] instead of [ ).
xx = full(real(xx)); y = full(real(y)); % For compatibility
bins = xx + eps(xx);
nn = histc(y',[-inf bins],1);
% Combine first bin with 2nd bin and last bin with next to last bin
nn(2,:) = nn(2,:)+nn(1,:);
nn(end-1,:) = nn(end-1,:)+nn(end,:);
nn = nn(2:end-1,:);