-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.tex
1383 lines (1241 loc) · 79.4 KB
/
main.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
%%%%%%%%%%%%%%%%%%%%%%% file template.tex %%%%%%%%%%%%%%%%%%%%%%%%%
%
% This is a general template file for the LaTeX package SVJour3
% for Springer journals. Springer Heidelberg 2010/09/16
%
% Copy it to a new file with a new name and use it as the basis
% for your article. Delete % signs as needed.
%
% This template includes a few options for different layouts and
% content for various journals. Please consult a previous issue of
% your journal as needed.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% First comes an example EPS file -- just ignore it and
% proceed on the \documentclass line
% your LaTeX will extract the file if required
\begin{filecontents*}{example.eps}
%!PS-Adobe-3.0 EPSF-3.0
%%BoundingBox: 19 19 221 221
%%CreationDate: Mon Sep 29 1997
%%Creator: programmed by hand (JK)
%%EndComments
gsave
newpath
20 20 moveto
20 220 lineto
220 220 lineto
220 20 lineto
closepath
2 setlinewidth
gsave
.4 setgray fill
grestore
stroke
grestore
\end{filecontents*}
%
\RequirePackage{fix-cm}
%
%\documentclass{svjour3} % onecolumn (standard format)
%\documentclass[smallcondensed]{svjour3} % onecolumn (ditto)
%\documentclass[smallextended]{svjour3} % onecolumn (second format)
\documentclass[smallextended,twocolumn]{svjour3} % twocolumn
%
\journalname{Behavior Research Methods}
\smartqed % flush right qed marks, e.g. at end of proof
%
\usepackage{graphicx}
%
%usepackage{mathptmx} % use Times fonts if available on your TeX system
%
%\usepackage[natbibapa]{apacite}
\usepackage{natbib}
%\usepackage{tabularx}
\usepackage{amsmath}
\usepackage{textcomp}
\usepackage{booktabs}
\usepackage{units}
\usepackage[draft]{hyperref} %tmp added draft option for spilling refs
\usepackage{wrapfig}
\usepackage{todonotes}
\newcommand{\eg}{e.g., }
\newcommand{\ie}{i.e., }
\newcommand{\remodnav}{REMoDNaV}
\newcommand{\fig}[1]{{Figure~\ref{fig:#1}}}
\newcommand{\tab}[1]{{Table~\ref{tab:#1}}}
\newcommand{\param}[1]{{\texttt{#1}}}
\begin{document}
\input{results_def.tex}
\onecolumn
\title{REMoDNaV: Robust Eye-Movement Classification for Dynamic Stimulation} %\\ (remodnav)
% \titlenote{The title should be detailed enough for someone to know whether
% the article would be of interest to them, but also concise. Please ensure the
% broadness and claims within the title are appropriate to the content of the
% article itself.}
\author{%
Asim~H.~Dar\textsuperscript{*} \and
Adina~S.~Wagner\textsuperscript{*} \and
Michael~Hanke\\
{\small \textsuperscript{*} Both authors have contributed equally}}
\institute{Asim~H.~Dar \at
Special Lab Non-Invasive Brain Imaging, Leibniz Institute for Neurobiology, Brenneckestra{\ss}e~6, Magde\-burg, Germany
%Tel.: +123-45-678910\\
%Fax: +123-45-678910\\
%\email{fauthor@example.com} % \\
% \emph{Present address:} of F. Author % if needed
\and
Adina Wagner \at
Psychoinformatics lab, Institute of Neuroscience and Medicine (INM-7: Brain and Behaviour), Research Centre Jülich, Germany
\and
Michael Hanke \at
Psychoinformatics lab, Institute of Neuroscience and Medicine (INM-7: Brain and Behaviour), Research Centre Jülich and Institute of Systems Neuroscience, Medical Faculty,
Heinrich Heine University Düsseldorf, Germany
\email{michael.hanke@gmail.com}
}
%\affil[1]{Psychoinformatics Lab, Institute of Psychology, Otto-von-Guericke University, Magdeburg, Germany}
\date{Received: date / Accepted: date}
\maketitle
% Please list all authors that played a significant role in the research
% involved in the article. Please provide full affiliation information
% (including full institutional address, ZIP code and e-mail address) for all
% authors, and identify who is/are the corresponding author(s).
\begin{abstract}
% Abstracts should be up to 300 words and provide a succinct summary of the
% article. Although the abstract should explain why the article might be
% interesting, care should be taken not to inappropriately over-emphasize the
% importance of the work described in the article. Citations should not be used
% in the abstract, and the use of abbreviations should be minimized. If you are
% writing a Research or Systematic Review article, please structure your
% abstract into Background, Methods, Results, and Conclusions.
Tracking of eye movements is an established measurement for many types of
experimental paradigms.
More complex and more prolonged visual stimuli have made algorithmic approaches to
eye movement event classification the most pragmatic option.
A recent analysis revealed that many current algorithms are lackluster when it
comes to data from viewing dynamic stimuli such as video sequences.
Here we present an event classification algorithm---built on an existing
velocity-based approach---that is suitable for both static and dynamic
stimulation, and is capable of classifying saccades, post-saccadic
oscillations, fixations, and smooth pursuit events.
We validated classification performance and robustness on three public datasets:
1)~manually annotated, trial-based gaze trajectories for viewing static images,
moving dots, and short video sequences, 2)~lab-quality gaze recordings for a
feature length movie, and 3)~gaze recordings acquired under suboptimal lighting
conditions inside the bore of a magnetic resonance imaging (MRI) scanner for
the same full-length movie.
We found that the proposed algorithm performs on par or better compared
to state-of-the-art alternatives for static stimulation. Moreover, it yields
eye movement events with biologically plausible characteristics on prolonged
dynamic recordings. Lastly, algorithm performance is robust
on data acquired under suboptimal conditions that exhibit a temporally
varying noise level.
These results indicate that the proposed algorithm is a robust tool with
improved classification accuracy across a range of use cases.
The algorithm is cross-platform compatible, implemented using the
Python programming language, and readily available as free and open source software
from public sources.
\keywords{%
eye tracking \and
adaptive classification algorithm \and
saccade classification algorithm \and
statistical saccade analysis \and
glissade classification \and
adaptive threshold algorithm \and
data preprocessing
}
\end{abstract}
% \todo[inline]{The scope of the article is a "Data Note" that describes new
% "derived" data generated from the raw eyetracking data released by the
% studyforrest project. I propose to produce two types of artifacts: 1.
% filtered/preprocessed eyetracking data, and 2. a list with detected saccades
% for each recording.}
% \todo[inline]{It would be good to also release fully preprocessed data. Apart
% from applying the chosen filter, it would also make sense to me to temporally
% down-sample the data. What would be a practical sampling rate that reduces
% the data size (and some noise), but does not negatively impact most potential
% analyses? 100 Hz? 200 Hz? Even in the latter case it would still be a 5x
% reduction in size.}
% \todo[inline, backgroundcolor = green]{250 Hz is the lower limit to detect
% most saccades (Kern 2000). However,the downsampled data did not reach the
% same accuracy level as the higher frequency data, probably because the
% algorithm was designed to work on higher frequencies (as stated by the
% authors). Therefore we left it at the 1000 Hz frequency}
\twocolumn
\section*{Introduction}\label{intro}
% \todo[inline]{\textit{make connection to studyforrest. studyforrest has
% eyetracking data. why is it necessary to have that preprocessed?}}
% The data used for this thesis originates from the open science project
% 'studyforrest'. It centers around two large data acquisition phases employing
% the movie 'Forrest Gump' as stimulus. \cite{Hanke.2014,Hanke.2016} The
% project provides a large variety of collections of data to enable fellow
% researchers to build upon existing knowledge and further extend the dataset.
% Along other measures, the eye gaze coordinates were being recorded during the
% original sessions. Contrarily to the standard automatic detection process we
% applied an adaptive algorithm to the eye movement data to provide a more
% precise computation of saccades and fixations.
A spreading theme in cognitive neuroscience is to use dynamic and naturalistic
stimuli such as video clips or movies as opposed to static and isolated
stimuli \citep{real_world}. Using dynamic stimuli promises to observe
the nuances of cognition in a more life-like environment \citep{maguire2012studying}.
Some interesting applications include the determination of neural
response to changes in facial expression \citep{Harris2014}, understanding
complex social interactions by using videos \citep{Tikka2012}, and more
untouched themes such as the underlying processing of music
\citep{Toiviainen2014}. In such studies, an unobtrusive behavioral measurement
is required to quantify the relationship between stimulus and response.
Tracking the focus of participants' gaze is a suitable, well established
method that has been successfully employed in a variety of studies ranging
from the understanding of visual attention \citep{HantaoLiu2011}, memory
\citep{Hannula2010}, and language comprehension \citep{Gordon2006}.
%
Regardless of use case, the raw eye tracking data (gaze position coordinates)
provided by eye tracking devices are rarely used ``as is". Instead, in order
to disentangle different cognitive, oculomotor, or perceptive states
associated with different types of eye movements, most research relies on the
classification of eye gaze data into distinct eye movement event categories
\citep{Schutz2011}. The most feasible approach for doing this lies in the
application of appropriate event classification algorithms.
However, a recent comparison of algorithms found that while many readily
available algorithms for eye movement classification performed well on data
from static stimulation or short trial-based acquisitions with simplified
moving stimuli, none worked particularly well on data from complex
dynamic stimulation, such as video clips, when compared to human coders
\citep{Andersson2017}.
%
And indeed, when we evaluated an algorithm by \citet{Nystrom2010AnData}, one of
the winners in the aforementioned comparison, on data from prolonged
stimulation (\unit[$\approx$15]{min}) with a feature film, we found the
average and median durations of labeled fixations to exceed literature
reports \citep[\eg][]{holmqvist2011eye,dorr2010variability} by up to a factor
of two. Additionally, and in particular for increasing levels of noise in the
data, the algorithm classified too few fixations, as also noted by
\citet{Friedman2018}, because it discarded potential fixation events that
contained data artifacts such as signal-loss and distortion associated with
blinks.
%
%However, robust performance on noisy data is of particular relevance in the
%context of ``natural stimulation'', as the ultimate natural stimulation is the
%actual natural environment, and data acquired outdoors or with mobile
%devices typically does not match the quality achieved in dedicated lab
%setups.
Therefore our objective was to improve upon the available eye movement
classification algorithms, and develop a tool that performs
robustly on data from dynamic, feature-rich stimulation, without sacrificing classification
accuracy for static and simplified stimulation. Importantly, we aimed for
applicability to prolonged recordings that potentially exhibit periods of
signal-loss and non-stationary noise levels.
Finally, one of our main objectives was to keep the algorithm as accessible
and easily available as possible in order to ease the difficulties associated
with closed-source software or non-publicly available source code of published
algorithms.
% maybe work in \citep{Hooge2018} again
Following the best practices proposed by \citet{hessels2018eye},
we define the different eye-movements that are supported by our algorithm
on a functional and oculomotor dimension as follows:
A \textit{fixation} is a period of time during which a part of the visual stimulus
is looked at and thereby projected to a relatively constant location on the retina.
This type of eye movement is necessary for visual intake, and characterized by a
relatively still gaze position with respect to the world (e.g., a computer screen
used for stimulus presentation) in the eye-tracker signal.
A fixation event therefore excludes periods of \textit{smooth pursuit}.
These events are eye movements during which a
part of the visual stimulus that moves with respect to the world is looked at for
visual intake (e.g., a moving dot on a computer screen). Like fixations,
the stimulus is projected to a relatively constant location on the retina
\citep{carl1987pursuits}, however, the event is characterized by steadily changing
gaze position in the eye-tracker signal.
If this type of eye movement is not properly classified,
erroneous fixation and saccade events (which smooth pursuits would be classified into
instead) are introduced \citep{Andersson2017}. Contemporary algorithms rarely provide
this functionality \cite[but see \eg][for existing algorithms with
smooth pursuit classification]{LARSSON2015145,Komogortsev2013}.
\textit{Saccades} on the other hand are also characterized by changing gaze positions,
but their velocity trace is usually higher than that of pursuit movements.
They serve to shift the position of the eye to a target region, and, unlike
during pursuit or fixation events, visual intake
is suppressed \citep{Schutz2011}. Lastly, \textit{post-saccadic oscillations} are periods of
ocular instability after a saccade \citep{Nystrom2010AnData}.
Here we introduce \remodnav\ (robust eye movement classification for dynamic
stimulation), a new tool that aims to meet our objectives and classifies the
eye movement events defined above. It is built on the
aforementioned algorithm by \citet{Nystrom2010AnData} (subsequently labeled NH)
that employs an adaptive approach to velocity based eye movement event
classification. \remodnav\ enhances NH with the use of robust
statistics, and a compartmentalization of prolonged time series into short,
more homogeneous segments with more uniform noise levels.
Furthermore, it adds support for pursuit event classification.
Just as the original algorithm, its frame of reference is world centered,
i.e. the gaze coordinates have a reference to a stimulation set-up with a fixed
position in the world such as x and y coordinates in pixel of a computer screen,
and it is meant to be used with eye tracking data from participants viewing
static (e.g. images) or dynamic (e.g. videos) stimuli, recorded with remote or
tower-mounted eye trackers.
Importantly, it is built and distributed as free, open source software,
and can be easily obtained and executed with free tools.
We evaluated \remodnav\ on three different
datasets from conventional paradigms, and dynamic, feature-rich stimulation (high and lower
quality), and relate its performance to the algorithm comparison by
\cite{Andersson2017}.
\section*{Methods}\label{methods}
% Methods (3 sections; our algo, comparison to current algos, application on
% studyforrest dataset)
%\ todo[inline]{\textit{Elaborate on how the algorithm works;For software tool
%papers, this section should address how the tool works and any relevant
%technical details required for implementation of the tool by other
%developers.}}
Event classification algorithms can be broadly grouped into \textit{velocity-} and
\textit{dispersion-}based algorithms. The former rely on velocity thresholds to
differentiate between different eye movement events, while the latter classify
eye movements based on the size of the region the recorded data falls
into for a given amount of time \citep{holmqvist2011eye}. Both types of algorithms
are common (see e.g., \citet{hessels2017noise} for a recent dispersion-based,
and e.g., \citet{van2018gazepath} for a recent velocity-based solution,
and see \citet{dalveren2019evaluation} for an evaluation of common algorithms
of both types).
Like NH, \remodnav\ is a \textit{velocity-based} event classification algorithm.
The algorithm comprises two major steps: preprocessing and event classification. The following
sections detail individual analysis steps. For each step relevant algorithm
parameters are given in parenthesis.
\fig{alg} provides an overview of the algorithm's main components.
\tab{parameters} summarizes all parameters, and lists their default values.
The computational definitions of the different eye movements
\citep{hessels2018eye} are given within the event classification description.
Note, however, that some of the computational definitions of eye movements can be
adjusted to comply to alternative definitions by changing the algorithms'
parameters.
\subsection*{Preprocessing}
The goal of data preprocessing is to compute a time series of eye movement
velocities on which the event classification algorithm can be executed, while jointly
reducing non-eyemovement-related noise in the data as much as possible.
First, implausible spikes in the coordinate time series are removed with a
heuristic spike filter \citep{stampe1993} (\fig{alg}, P1). This filter is
standard in many eye tracking toolboxes and often used for preprocessing
\citep[\eg][]{Friedman2018}.
%
Data samples around signal loss (\eg eye blinks) can be set to non-numeric values (NaN)
in order to eliminate spurious movement signals without shortening the time series
(\param{dilate\_nan}, \param{min\_blink\_duration}; \fig{alg}, P2). This is
motivated by the fact that blinks can produce artifacts in the eye-tracking signal when the
eyelid closes and re-opens \citep{choe2016pupil}.
%
Coordinate time series are temporally filtered in two different ways
\fig{alg}, P3). A relatively large median filter
(\param{median\_filter\_length}) is used to emphasize large amplitude saccades. This type of
filtered data is later used for a coarse segmentation of a time series into
shorter intervals between major saccades.
%
Separately, data are also smoothed with a Savitzky-Golay filter
(\param{savgol\_ \{length,polyord\}}). All event classification beyond the
localization of major saccades for time series chunking is performed on this
type of filtered data.
After spike-removal and temporal filtering, movement velocities are computed.
To disregard biologically implausible measurements, a
configurable maximum velocity (\param{max\_vel}) is enforced---any samples
exceeding this threshold are replaced by this set value.
%The result of a default preprocessing procedure is displayed in \fig{preproc}.
%
%\begin{figure}
% \includegraphics[width=0.5\textwidth]{img/preproc.pdf}
% \caption{Examplary preprocessing.}
% \label{fig:preproc}
%\end{figure}
\begin{figure*}
\includegraphics[width=1\textwidth]{img/flowchart_2.pdf}
\caption{Schematic algorithm overview.
(A) Preprocessing. The two plots show raw (blue) and processed (black)
time series after preprocessing with the default parameter values
(see Table \ref{tab:parameters} for details).
(B) Adaptive saccade velocity computation and time series chunking.
Starting from an initial velocity threshold (\param{velthresh\_startvelocity}),
a global velocity threshold is iteratively determined. The time series is chunked
into intervals between the fastest saccades across the complete recording.
(C) Saccade and PSO classification.
Saccade on- and offsets, and PSO on- and offsets are classified based on adaptive
velocity thresholds computed within the respective event contexts.
The default context is either \unit[1]{s} centered on the peak velocity for saccadic
events used for time series chunking, or the entire time series chunk for
intersaccadic intervals. PSOs are classified into low- or high-velocity PSOs
depending on whether they exceed the saccade onset- or peak-velocity threshold.
(D) Fixation and pursuit classification.
Remaining unlabeled segments are filtered with a low-pass Butterworth filter. Samples
exceeding a configurable pursuit velocity threshold (\param{pursuit\_velthresh})
are classified as pursuits, and segments that do not qualify as pursuits are
classified as fixations.
}
\label{fig:alg}
\end{figure*}
\begin{table*}[tbp]
\caption{Exhaustive list of algorithm parameters, their default values, and units.}
\label{tab:parameters}
\small
\begin{tabular}{lp{85mm}l}
\textbf{Name} & \textbf{Description} & \textbf{Value} \\
& & \\
\multicolumn{3}{l}{\textit{Preprocessing (in order of application during processing)}} \\
\texttt{px2deg} &
size of a single (square) pixel &
no default [\unit{deg}]\\
\texttt{sampling\_rate} &
temporal data sampling rate/frequency &
no default [\unit{Hz}]\\
\texttt{min\_blink\_duration} &
missing data windows shorter than this duration will not be considered for \texttt{dilate\_nan}&
\unit[0.02]{s}\\
\texttt{dilate\_nan} &
duration for which to replace data by missing data markers on either side of a
signal-loss window (\fig{alg}, P2)&
\unit[0.01]{s}\\
\texttt{median\_filter\_length} &
smoothing median-filter size (for initial data chunking only) (\fig{alg}, P3)&
\unit[0.05]{s}\\
\texttt{savgol\_length} &
size of Savitzky-Golay filter for noise reduction (\fig{alg}, P3)&
\unit[0.019]{s}\\
\texttt{savgol\_polyord} &
polynomial order of Savitzky-Golay filter for noise reduction (\fig{alg}, P3)&
2\\
\texttt{max\_vel} &
maximum velocity threshold, will replace value with maximum, and issue
warning if exceeded to inform about
potentially inappropriate filter settings
\citep[default value based on ][]{holmqvist2011eye}&
\unit[1000]{deg/s}\\
\\\multicolumn{3}{l}{\textit{Event classification}} \\
\texttt{min\_saccade\_duration} &
minimum duration of a saccade event candidate (\fig{alg}, E3) &
\unit[0.01]{s}\\
\texttt{max\_pso\_duration} &
maximum duration of a post-saccadic oscillation (glissade) (\fig{alg}, E3) &
\unit[0.04]{s}\\
\texttt{min\_fixation\_duration} &
minimum duration of a fixation event candidate (\fig{alg}, E4)&
\unit[0.04]{s}\\
\texttt{min\_pursuit\_duration} &
minimum duration of a pursuit event candidate (\fig{alg}, E4)&
\unit[0.04]{s}\\
\texttt{min\_intersaccade\_duration} &
no saccade classification is performed in windows shorter than twice this value, plus minimum saccade and PSO duration (\fig{alg}, E2)&
\unit[0.04]{s}\\
\texttt{noise\_factor} &
adaptive saccade onset threshold velocity is the median absolute deviation of velocities in the window of interest, times this factor (peak velocity threshold is twice the onset velocity); increase for noisy data to reduce false positives \citep[equivalent: 3.0]{Nystrom2010AnData}(\fig{alg}E1)&
5\\
\texttt{velthresh\_startvelocity} &
start value for adaptive velocity threshold algorithm \citep{Nystrom2010AnData}, should
be larger than any conceivable minimum saccade velocity (\fig{alg}, E1)&
\unit[300]{deg/s}\\
\texttt{max\_initial\_saccade\_freq} &
maximum saccade frequency for initial classification of major saccades, initial data
chunking is stopped if this frequency is reached (should be smaller than an expected
(natural) saccade frequency in a particular context), default based on literature reports of a natural, free-viewing saccade frequency of \unit[$\sim$1.7 $\pm$0.3]{Hz} during a movie stimulus \citep{amit2017temporal} (\fig{alg}E1)&
\unit[2]{Hz}\\
\texttt{saccade\_context\_window\_length} &
size of a window centered on any velocity peak for adaptive determination of
saccade velocity thresholds (for initial data chunking only) (\fig{alg}, E2)&
\unit[1]{s}\\
\texttt{lowpass\_cutoff\_freq} &
cut-off frequency of a Butterworth low-pass filter applied to determine drift
velocities in a pursuit event candidate (\fig{alg}, E4)&
\unit[4]{Hz}\\
\texttt{pursuit\_velthresh} &
fixed drift velocity threshold to distinguish periods of pursuit from periods of fixation; higher than natural ocular drift velocities during fixations \citep[\eg ][]{GOLTZ1997789,cherici2012} (\fig{alg}, E4)&
\unit[2]{deg/s}\\
\end{tabular}
\end{table*}
\subsection*{Event classification}
\subsubsection*{Saccade velocity threshold}
Except for a few modifications, \remodnav\ employs the adaptive saccade
classification algorithm proposed by \cite{Nystrom2010AnData}, where saccades are
initially located by thresholding the velocity time series by a critical value.
Starting from an initial velocity threshold (\param{velthresh\_startvelocity},
termed $PT_1$ in NH), the critical value is determined adaptively by computing
the variance of sub-threshold velocities ($V$), and placing the new velocity
threshold at:
%
\begin{equation} PT_n = \overline{V}_{n-1} + F \times \sqrt{{\sum(V_{n-1} -
\overline{V}_{n-1})^2} \over {N-1}} \end{equation}
%
where $F$ determines how many standard deviations above the average velocity
the new threshold is located. This procedure is repeated until it stabilizes
on a threshold velocity.
%
\begin{equation} |PT_n - PT_{n-1}| < 1^\circ/sec \end{equation}
\remodnav\ alters this algorithm by using robust statistics that are more
suitable for the non-normal distribution of velocities \citep{Friedman2018},
such that the new threshold is computed by:
%
\begin{equation}\label{eq:threshold}
PT_n = median({V}_{n-1}) + F \times MAD({V}_{n-1})
\end{equation}
%
where $MAD$ is the median absolute deviation, and $F$ is a
scalar parameter of the algorithm.
This iterative process is illustrated in \fig{alg}, E1 (upper panel).
% Adina: removed in favor of new algorithm overview
%\begin{figure}
% \includegraphics[width=0.5\textwidth]{img/vel_est_1.pdf}
% \caption{Iterative, global estimation of velocity thresholds
% for saccades (SACC), and high/low velocity post saccadic oscillations (HPSO/LPSO).
% The method is adapted from \cite{Nystrom2010AnData}, but is modified to use robust statistics
% with median absolute deviation (MAD) as a measure of variability, more suitable
% for data with a non-normal distribution.}
% \label{fig:velest1}
%\end{figure}
\subsection*{Time series chunking}
As the algorithm aims to be applicable to prolonged recordings with
potentially inhomogeneous noise levels, the time series needs
to be split into shorter chunks to prevent the negative impact of sporadic
noise flares on the aforementioned adaptive velocity thresholding procedure.
\remodnav\ implements this time-series chunking by determining a critical velocity on a
median-filtered (\param{median\_filter\_length}) time series comprising the
full duration of a recording (\fig{alg}, E2). Due to potentially elevated noise
levels, the resulting threshold tends to overestimate an optimal threshold.
Consequently, only periods of fastest eye movements will exceed this threshold.
All such periods of consecutive above-threshold velocities are weighted by the
sum of these velocities. Boundaries of time series chunks are determined by
selecting such events sequentially (starting with the largest sums), until a
maximum average frequency across the whole time series is reached
(\param{max\_initial\_saccade\_ freq}). The resulting chunks represent data
intervals between saccades of maximum magnitude in the respective data.
\fig{alg}, E3 (right) exemplifies event classification within such an intersaccadic interval.
\subsection*{Classification of saccades and post-saccadic oscillations}
Classification of these event types is identical to the NH algorithm, only the data
context and metrics for determining the velocity thresholds differ. For
saccades that also represent time series chunk boundaries (event label
\texttt{SACC}), a context of \unit[1]{s}
(\param{saccade\_context\_window\_ length}) centered on the peak velocity is
used by default, for any other saccade (event label \texttt{ISAC}) the entire
time series chunk represents that context (\fig{alg}, E3).
Peak velocity threshold and on/offset velocity threshold are then determined by
equation \ref{eq:threshold} with $F$ set to $2\times\mathtt{noise\_factor}$ and
\param{noise\_factor}, respectively. Starting from a velocity peak, the
immediately preceding and the following velocity minima that do not exceed the
on/offset threshold are located and used as event boundaries. Qualifying events
are rejected if they do not exceed a configurable minimum duration or violate
the set saccade maximum proximity criterion (\param{min\_ saccade\_duration},
\param{min\_intersaccade\_duration}).
As in NH, post-saccadic oscillations are events that immediately follow a
saccade, where the velocity exceeds the saccade velocity threshold within a short
time window (\param{max\_pso\_duration}). \remodnav\ distinguishes low-velocity
(event label \texttt{LPSO} for chunk boundary event, \texttt{ILPS} otherwise)
and high-velocity oscillations (event label \texttt{HPSO} or \texttt{IHPS}),
where the velocity exceeds the saccade onset or peak velocity threshold,
respectively.
% Adina: Removed in favor of new algorithm overview
%\begin{figure}
% \includegraphics[width=0.5\textwidth]{img/vel_est_2.pdf}
% \caption{Iterative event classification between major saccades (SACC).
% The algorithm reports saccades within major saccade windows (ISAC),
% high/low velocity post saccadic oscillations after ISAC events (IHPS/ILPS),
% fixations (FIXA), and smooth pursuits (PURS).}
% \label{fig:velest2}
%\end{figure}
\subsection*{Pursuit and fixation classification}
For all remaining, unlabeled time series segments that are longer than a
minimum duration (\param{min\_fixation\_ duration}), velocities are low-pass
filtered (Butterworth, \param{lowpass\_cutoff\_freq}). Any segments
exceeding a minimum velocity threshold (\param{pursuit\_velthresh}) are
classified as pursuit (event label \texttt{PURS}). Pursuit on/offset classification
uses the same approach as that for saccades: search for local minima preceding
and following the above threshold velocities.
%
Any remaining segment that does not qualify as a pursuit event is classified
as a fixation (event label \texttt{FIXA}) (\fig{alg}, E4).
\subsection*{Operation}\label{op}
\remodnav\ is free and open-source software, written in the Python language and
released under the terms of the MIT license. In addition to the Python standard
library it requires the Python packages
%
NumPy \citep{oliphant2006guide},
Matplotlib \citep{hunter2007matplotlib},
statsmodels \citep{seabold2010statsmodels},
and SciPy \citep{JOP+2001} as software dependencies.
Furthermore, DataLad \citep{HH+2013},
and Pandas \citep{mckinney2010data}
%
have to be available to run the test
battery. \remodnav\ itself, and all software dependencies are available on all
major operating systems. There are no particular hardware requirements for
running the software other than sufficient memory to load and process the data.
A typical program invocation looks like
%
\begin{verbatim}
remodnav <inputfile> <outputfile> \
<px2deg> <samplingrate>
\end{verbatim}
%
where \texttt{<inputfile>} is the name of a tab-separated-value (TSV) text file
with one gaze coordinate sample per line. An input file can have any number of
columns, only the first two columns are read and interpreted as $X$ and $Y$
coordinates. Note that this constrains input data to a dense data representation,
i.e. either data from eye trackers with fixed sampling frequency throughout the
recording, or sparse data that has been transformed into a dense representation
beforehand.
The second argument \texttt{<outputfile>} is the file name of a
BIDS-compliant \citep{gorgolewski2016brain} TSV text file that will contain a
report on one classified eye movement event per line, with onset and offset time,
onset and offset coordinates, amplitude, peak velocity, median velocity and
average velocity. The remaining arguments are the only two mandatory
parameters: the conversion factor from pixels to visual degrees, \ie the visual
angle of a single (square) pixel (\texttt{<px2deg>} in \unit{deg}), and the
temporal sampling rate (\texttt{<sampling\_rate>} in \unit{Hz}).
Any other supported parameter can be added to the program invocation to override
the default values.
A complete list of supported parameters (sorted by algorithm step) with their
description and default value, are listed in \tab{parameters}.
While the required user input is kept minimal, the number of configurable
parameters is purposefully large to facilitate optimal parameterization for
data with specific properties. Besides the list of classified events, a
visualization of the classification results, together with a time course of
horizontal and vertical gaze position, and velocities is provided for
illustration and initial quality assessment of algorithm performance on each
input data file.
\section*{Validation analyses}\label{ana}
% \todo[inline]{three major types of comparison: with andersson human labeling,
% stats of forrest lab recording with andersson video data stats, forrest lab
% vs forrest mri stats. the goal is to show that we are similar to humans, as
% good (or better) as other algorithms (by comparison with scores in
% andersson2017), and proceduce "similar" results on a different movie dataset,
% and similar results across two different qualities of recordings with the
% same stimulus (lab vs MRI). No more, no less IMHO. This all translates to
% three use cases: trial-by-trial data (from anderson), good movie data without
% trial structure (forrest lab), bad movie data (forrest mri)}
% THIS SECTION WILL BASICALLY SHOW THE INPUTS AND THE OUTPUTS(RESULTS
% BASICALLY)
The selection of datasets and analyses for validating algorithm performance was
guided by three objectives: 1) compare to other existing
solutions; 2) demonstrate plausible results on data from prolonged gaze
coordinate recordings during viewing of dynamic, feature-rich stimuli; and 3) illustrate result
robustness on lower-quality data. The following three sections each introduce a
dataset and present the validation results for these objectives. All analysis
presented here are performed using default parameters (\tab{parameters}), with
no dataset-specific tuning other than the built-in adaptive behavior.
\subsection*{Algorithm comparison}\label{ana_1}
Presently, \cite{Andersson2017} represents the most comprehensive comparative
study on eye movement classification algorithms. Moreover, the dataset employed
in that study was made publicly available. Consequently, evaluating \remodnav\
performance on these data and using their metrics offers a straightforward
approach to relate this new development to alternative solutions.
% dataset
The dataset provided by
\cite{Andersson2017}\footnote{github.com/richardandersson/EyeMovementDetector\linebreak[0]Evaluation}
consists of monocular eye gaze data produced from viewing stimuli from three
distinct categories---images, moving dots and videos. The data release contains
gaze coordinate time series (\unit[500]{Hz} sampling rate), and metadata on
stimulus size and viewing distance. Importantly, each time point was manually
classified by two expert human raters as one of six event categories: fixation,
saccade, PSO, smooth pursuit, blink and undefined (a sample that did not fit
any other category). A minor labeling mistake reported in \cite{Zemblys2018}
was fixed prior to this validation analysis.
For each stimulus category, we computed the proportion of misclassifications
per event type, comparing \remodnav\ to each of the human coders, and, as a
baseline measure, the human coders against each other.
%
A time point was counted as misclassified if the two compared classifications
did not assign the same label. We limited this analysis to all time points that
were labeled as fixation, saccade, PSO, or pursuit by any method, hence
ignoring the rarely used NaN/blinks or ``undefined" category. For a direct
comparison with the results in \cite{Andersson2017}, the analysis was repeated
while also excluding samples labeled as pursuit.
In the labeled data, there was no distinction made between high- and low-velocity
PSOs, potentially because the literature following \citet{Nystrom2010AnData}
did not adopt their differentiation of PSOs into velocity categories.
All high- and low-velocity PSOs classified by \remodnav\ were therefore
collapsed into a single PSO category. \tab{mclf} shows the
misclassification rates for all pairwise comparisons, in all stimulus types.
In comparison to the NH algorithm, after which the proposed work was modelled,
\remodnav\ performed consistently better (32/93/70\% average misclassification for NH,
vs. \imgMNALMclfWOP/\dotsRAALMclfWOP/ \videoRAALMclfWOP\% worst
misclassification for \remodnav\ in categories images, dots, and videos). Compared to all ten
algorithms evaluated in \citet{Andersson2017}, \remodnav\ exhibits the lowest
misclassification rates across all stimulus categories.
%
When taking smooth pursuit events into account, the misclassification rate
naturally increases, but remains comparably low. Importantly, it still exceeds the
performance of all algorithms tested in \citet{Andersson2017} in the dots
and video category, and performs among the best in the images category.
Additionally, both with and without smooth pursuit, \remodnav s performance
exceeds also that of a recent deep neural network trained specifically on
video clips \citep[compare Table 7: 34\% misclassification versus \videoMNALMCLF\%
for \remodnav]{Startsev2018}.
\begin{table}[tbp]
% table caption is above the table
\caption{Proportion of samples in each stimulus category classified in
disagreement between human coders (MN, RA) and the \remodnav\ algorithm
(AL). The MC (misclassification) column lists proportions considering
all four event categories (fixation, saccade, PSO, pursuit), while
the w/oP (without pursuit) column excludes pursuit events for a direct
comparison with \citet[][Tables 8-10]{Andersson2017}.
The remaining columns show the percentage of labels assigned to incongruent
time points by each rater (deviation of their sum from 100\% is due to
rounding).
}
\label{tab:mclf} % Give a unique label
% For LaTeX tables use
\begin{tabular}{llllllll}
\textbf{Images}&&&&&&&\\
\hline\noalign{\smallskip}
Comp & MC & w/oP & Coder & Fix & Sac & PSO & SP \\
\noalign{\smallskip}\hline\noalign{\smallskip}
MN-RA & \imgMNRAMCLF & \imgMNRAMclfWOP & MN & \imgMNRAFIXref & \imgMNRASACref & \imgMNRAPSOref & \imgMNRASPref \\
--- & --- & --- & RA & \imgMNRAFIXcod & \imgMNRASACcod & \imgMNRAPSOcod & \imgMNRASPcod \\
MN-AL & \imgMNALMCLF & \imgMNALMclfWOP & MN & \imgMNALFIXref & \imgMNALSACref & \imgMNALPSOref & \imgMNALSPref \\
--- & --- & --- & AL & \imgMNALFIXcod & \imgMNALSACcod & \imgMNALPSOcod & \imgMNALSPcod \\
RA-AL & \imgRAALMCLF & \imgRAALMclfWOP & RA & \imgRAALFIXref & \imgRAALSACref & \imgRAALPSOref & \imgRAALSPref \\
---& ---& ---& AL & \imgRAALFIXcod & \imgRAALSACcod & \imgRAALPSOcod & \imgRAALSPcod \\
\noalign{\smallskip}
\textbf{Dots}&&&&&&&\\
\hline\noalign{\smallskip}
Comp & MC & w/oP & Coder & Fix & Sac & PSO & SP \\
\noalign{\smallskip}\hline\noalign{\smallskip}
MN-RA & \dotsMNRAMCLF & \dotsMNRAMclfWOP & MN & \dotsMNRAFIXref & \dotsMNRASACref & \dotsMNRAPSOref & \dotsMNRASPref \\
--- & --- & --- & RA & \dotsMNRAFIXcod & \dotsMNRASACcod & \dotsMNRAPSOcod & \dotsMNRASPcod \\
MN-AL & \dotsMNALMCLF & \dotsMNALMclfWOP & MN & \dotsMNALFIXref & \dotsMNALSACref & \dotsMNALPSOref & \dotsMNALSPref \\
--- & --- & --- & AL & \dotsMNALFIXcod & \dotsMNALSACcod & \dotsMNALPSOcod & \dotsMNALSPcod\\
RA-AL & \dotsRAALMCLF & \dotsRAALMclfWOP & RA & \dotsRAALFIXref & \dotsRAALSACref & \dotsRAALPSOref & \dotsRAALSPref \\
---& ---& ---& AL & \dotsRAALFIXcod & \dotsRAALSACcod & \dotsRAALPSOcod & \dotsRAALSPcod \\
\noalign{\smallskip}
\textbf{Videos}&&&&&&&\\
\hline\noalign{\smallskip}
Comp & MC & w/oP & Coder & Fix & Sac & PSO & SP \\
\noalign{\smallskip}\hline\noalign{\smallskip}
MN-RA & \videoMNRAMCLF & \videoMNRAMclfWOP & MN & \videoMNRAFIXref & \videoMNRASACref & \videoMNRAPSOref & \videoMNRASPref \\
--- & --- & --- & RA & \videoMNRAFIXcod & \videoMNRASACcod & \videoMNRAPSOcod & \videoMNRASPcod \\
MN-AL & \videoMNALMCLF & \videoMNALMclfWOP & MN & \videoMNALFIXref & \videoMNALSACref & \videoMNALPSOref & \videoMNALSPref \\
--- & --- & --- & AL & \videoMNALFIXcod & \videoMNALSACcod & \videoMNALPSOcod & \videoMNALSPcod\\
RA-AL & \videoRAALMCLF & \videoRAALMclfWOP & RA & \videoRAALFIXref & \videoRAALSACref & \videoRAALPSOref & \videoRAALSPref \\
---& ---& ---& AL & \videoRAALFIXcod & \videoRAALSACcod & \videoRAALPSOcod & \videoRAALSPcod \\
\noalign{\smallskip}\hline
\end{tabular}
\end{table}
\fig{conf} shows confusion patterns for a comparison of algorithm
classifications with human labeling and displays the similarity between
classification decisions with Jaccard indices \citep[JI; ][]{jaccard1901etude}.
The JI is bound in range [0, 1] with higher values indicating higher similarity.
A value of 0.93 in the upper left cell of the very first matrix in \fig{conf}
for example indicates that 93\% of frames that are labeled as a fixation by
human coders RA and MN are the same. This index allows to quantify the
similarity of classifications independent of values in other cells.
While \remodnav\ does not achieve a
labeling similarity that reaches the human inter-rater agreement, it still
performs well. In particular, the relative magnitude of agreement with each
individual human coder for fixations, saccades, and PSOs, resembles the
agreement between the human coders. Classification of smooth
pursuits is consistent with human labels for the categories moving dots, and
videos. However, there is a substantial confusion of fixation and pursuit for
the static images. In a real-world application of \remodnav, pursuit classification
could be disabled (by setting a high pursuit velocity threshold) for data from
static images, if the occurrence of pursuit events can be ruled out a priori.
For this evaluation, however, no such intervention was made.
\begin{figure*}
% Use the relevant command to insert your figure file.
% For example, with the graphicx package use
% TODO make final figure and switch
%\includegraphics[width=1\textwidth]{img/conf_drawing.eps}
\includegraphics[trim=0 0 0 0,clip,width=1\textwidth]{img/confusion_MN_RA.pdf} \\
\includegraphics[trim=0 0 0 6.6mm,clip,width=1\textwidth]{img/confusion_MN_AL.pdf} \\
\includegraphics[trim=0 0 0 6.6mm,clip,width=1\textwidth]{img/confusion_RA_AL.pdf}
% figure caption is below the figure
\caption{Confusion patterns for pairwise eye movement classification
comparison of both human raters \citep[MN and RA; ][]{Andersson2017} and the
\remodnav\ algorithm (AL) for gaze recordings from stimulation with static
images (left column), moving dots (middle column), and video clips (right
column). All matrices present gaze sample based Jaccard indices \citep[JI;
][]{jaccard1901etude}. Consequently, the diagonals depict the fraction of
time points labeled congruently by both raters in relation to the number of
timepoints assigned to a particular event category by any rater.}
% Give a unique label
\label{fig:conf}
\end{figure*}
\begin{table}[tbp]
% table caption is above the table
\caption{Cohen's Kappa reliability between human coders (MN, RA), and \remodnav\ (AL)
with each of the human coders.
}
\label{tab:kappa} % Give a unique label
% For LaTeX tables use
\begin{tabular*}{0.5\textwidth}{c @{\extracolsep{\fill}}llll}
\textbf {Fixations} & & & \\
\hline\noalign{\smallskip}
Comparison & Images & Dots & Videos \\
\noalign{\smallskip}\hline\noalign{\smallskip}
MN versus RA & \kappaRAMNimgFix & \kappaRAMNdotsFix & \kappaRAMNvideoFix \\
AL versus RA & \kappaALRAimgFix & \kappaALRAdotsFix & \kappaALRAvideoFix \\
AL versus MN & \kappaALMNimgFix & \kappaALMNdotsFix & \kappaALMNvideoFix \\
\noalign{\smallskip}
\textbf{Saccades} & & & \\
\hline\noalign{\smallskip}
Comparison & Images & Dots & Videos \\
\noalign{\smallskip}\hline\noalign{\smallskip}
MN versus RA & \kappaRAMNimgSac & \kappaRAMNdotsSac & \kappaRAMNvideoSac \\
AL versus RA & \kappaALRAimgSac & \kappaALRAdotsSac & \kappaALRAvideoSac \\
AL versus MN & \kappaALMNimgSac & \kappaALMNdotsSac & \kappaALMNvideoSac \\
\noalign{\smallskip}
\textbf{PSOs} & & & \\
\hline\noalign{\smallskip}
Comparison & Images & Dots & Videos \\
\noalign{\smallskip}\hline\noalign{\smallskip}
MN versus RA & \kappaRAMNimgPSO & \kappaRAMNdotsPSO & \kappaRAMNvideoPSO \\
AL versus RA & \kappaALRAimgPSO & \kappaALRAdotsPSO & \kappaALRAvideoPSO \\
AL versus MN & \kappaALMNimgPSO & \kappaALMNdotsPSO & \kappaALMNvideoPSO \\
\noalign{\smallskip}\hline
\end{tabular*}
\end{table}
In addition to the confusion analysis and again following \citet{Andersson2017},
we computed Cohen's Kappa \citep{cohen1960coefficient} for an additional measure
of similarity between human and algorithm performance. It quantifies the
sample-by-sample agreement between two ratings following equation \ref{eq:kappa}:
%
\begin{equation}\label{eq:kappa}
K = \frac{P_o - P_c}{1- P_c}
\end{equation}
%
where $P_o$ is the observed proportion of agreement between the ratings, and
$P_c$ is the proportion of chance agreement. A value of $K=1$ indicates perfect
agreement, and $K=0$ indicates chance level agreement.
Table \ref{tab:kappa} displays the resulting values
between the two human experts, and \remodnav\ with each of the experts, for
each stimulus category and the three event types used in \citet{Andersson2017},
namely fixations, saccades, and PSOs (compare to \citet{Andersson2017}, table 7).
For all event types and stimulus categories, \remodnav\ performs on par or better
than the original NH algorithm, and in many cases on par or better than the best
of all algorithms evaluated in \citet{Andersson2017} within an event or stimulus type.
In order to further rank the performance of the proposed algorithm with respect
to the ten algorithms studied in \citet{Andersson2017}, we followed their
approach to compute root mean square deviations (RMSD) from human labels for
event duration distribution characteristics (mean and standard deviation of
durations, plus number of events) for each stimulus category (images, dots,
videos) and event type (fixations, saccades, PSOs, pursuits). This measure
represents a scalar distribution dissimilarity score that can be used as an
additional comparison metric of algorithm performance that focuses on overall
number and durations of classified events, instead of sample-by-sample
misclassification. The RMSD measure has a lower bound of $0.0$ (identical to
the average of both human raters), with higher values indicating larger
differences \citep[for detail information on the calculation of this metric
see][]{Andersson2017}.
\tab{rmsd} is modelled after \citet[Tables
3-6]{Andersson2017}, appended with \remodnav, showing RMSD based on the scores of human raters given in the original tables. As
acknowledged by the authors, the absolute value of the RMSD scores is not
informative due to scaling with respect to the respective maximum value of each
characteristic. Therefore, we converted RMSDs for each algorithm and event
type into zero-based ranks (lower is more human-like).
The LNS algorithm \citep{Larsson2013} was found to have the most human-like
performance for saccade and PSO classification in \cite{Andersson2017}. \remodnav\
performs comparable to LNS for both event types (saccades: $2.0$ vs. $3.3$;
PSOs: $2.3$ vs. $2.0$, mean rank across stimulus categories for LNS and \remodnav,
respectively).
Depending on the stimulus type, different algorithms performed best for
fixation classification. NH performed best for images and videos, but worst for
moving dots. \remodnav\ outperforms all other algorithms in the dots category,
and achieves rank 5 and 6 (middle range) for videos and images, respectively.
Across all stimulus and event categories, \remodnav\ achieves a mean ranking
of $2.9$, and a mean ranking of $3.2$ when not taking smooth pursuit into account.
\begin{table*}[p]
% table caption is above the table
\caption{Comparison of event duration statistics (mean, standard deviation, and number
of events) for image, dot, and video
stimuli. This table is modeled after \citet[Tables 3-6]{Andersson2017}, and
root-mean-square-deviations (RMSD) from human raters are shown
for fixations, saccades, PSOs, and pursuit as zero-based ranks (rank zero
is closest to the average of the two human raters). Summary statistics for
all algorithms used in \citet{Andersson2017} were taken from their publicly
available GitHub repository
(github.com/richardandersson/EyeMovementDetectorEvaluation). Cohens Kappa
was computed for the complete set of algorithms in \citet{Andersson2017} and
\remodnav .}
\label{tab:rmsd} % Give a unique label
% For LaTeX tables use
\begin{small}
\begin{tabular*}{\textwidth}{c @{\extracolsep{\fill}}lllllllllllll}
\multicolumn{13}{l}{\textit{Fixations}}\\
\toprule
& \multicolumn{4}{l}{Images} & \multicolumn{4}{l}{Dots} & \multicolumn{4}{l}{Videos}\\
Algorithm & Mean & SD & \# & rank & Mean & SD & \# & rank & Mean & SD & \# & rank \\
\noalign{\smallskip}\hline\noalign{\smallskip}
MN & \FIXimgmnMN & \FIXimgsdMN & \FIXimgnoMN & \rankFIXimgMN & \FIXdotsmnMN & \FIXdotssdMN & \FIXdotsnoMN & \rankFIXdotsMN & \FIXvideomnMN & \FIXvideosdMN & \FIXvideonoMN & \rankFIXvideoMN \\
RA & \FIXimgmnRA & \FIXimgsdRA & \FIXimgnoRA & \rankFIXimgRA & \FIXdotsmnRA & \FIXdotssdRA & \FIXdotsnoRA & \rankFIXdotsRA & \FIXvideomnRA & \FIXvideosdRA & \FIXvideonoRA & \rankFIXvideoRA \\
CDT & \FIXimgmnCDT & \FIXimgsdCDT & \FIXimgnoCDT & \rankFIXimgCDT & \FIXdotsmnCDT & \FIXdotssdCDT & \FIXdotsnoCDT & \rankFIXdotsCDT & \FIXvideomnCDT & \FIXvideosdCDT & \FIXvideonoCDT & \rankFIXvideoCDT \\
EM & - & - & - & - & - & - & - & - & - & - & - & - \\
IDT & \FIXimgmnIDT & \FIXimgsdIDT & \FIXimgnoIDT & \rankFIXimgIDT & \FIXdotsmnIDT & \FIXdotssdIDT & \FIXdotsnoIDT & \rankFIXdotsIDT & \FIXvideomnIDT & \FIXvideosdIDT & \FIXvideonoIDT & \rankFIXvideoIDT \\
IKF & \FIXimgmnIKF & \FIXimgsdIKF & \FIXimgnoIKF & \rankFIXimgIKF & \FIXdotsmnIKF & \FIXdotssdIKF & \FIXdotsnoIKF & \rankFIXdotsIKF & \FIXvideomnIKF & \FIXvideosdIKF & \FIXvideonoIKF & \rankFIXvideoIKF \\
IMST & \FIXimgmnIMST & \FIXimgsdIMST & \FIXimgnoIMST & \rankFIXimgIMST & \FIXdotsmnIMST & \FIXdotssdIMST & \FIXdotsnoIMST & \rankFIXdotsIMST & \FIXvideomnIMST & \FIXvideosdIMST & \FIXvideonoIMST & \rankFIXvideoIMST \\
IHMM & \FIXimgmnIHMM & \FIXimgsdIHMM & \FIXimgnoIHMM & \rankFIXimgIHMM & \FIXdotsmnIHMM & \FIXdotssdIHMM & \FIXdotsnoIHMM & \rankFIXdotsIHMM & \FIXvideomnIHMM & \FIXvideosdIHMM & \FIXvideonoIHMM & \rankFIXvideoIHMM \\
IVT & \FIXimgmnIVT & \FIXimgsdIVT & \FIXimgnoIVT & \rankFIXimgIVT & \FIXdotsmnIVT & \FIXdotssdIVT & \FIXdotsnoIVT & \rankFIXdotsIVT & \FIXvideomnIVT & \FIXvideosdIVT & \FIXvideonoIVT & \rankFIXvideoIVT \\
NH & \FIXimgmnNH & \FIXimgsdNH & \FIXimgnoNH & \rankFIXimgNH & \FIXdotsmnNH & \FIXdotssdNH & \FIXdotsnoNH & \rankFIXdotsNH & \FIXvideomnNH & \FIXvideosdNH & \FIXvideonoNH & \rankFIXvideoNH \\
BIT & \FIXimgmnBIT & \FIXimgsdBIT & \FIXimgnoBIT & \rankFIXimgBIT & \FIXdotsmnBIT & \FIXdotssdBIT & \FIXdotsnoBIT & \rankFIXdotsBIT & \FIXvideomnBIT & \FIXvideosdBIT & \FIXvideonoBIT & \rankFIXvideoBIT \\
LNS & - & - & - & - & - & - & - & - & - & - & - & - \\
\remodnav\ & \FIXimgmnRE & \FIXimgsdRE & \FIXimgnoRE & \rankFIXimgRE & \FIXdotsmnRE & \FIXdotssdRE & \FIXdotsnoRE & \rankFIXdotsRE & \FIXvideomnRE & \FIXvideosdRE & \FIXvideonoRE & \rankFIXvideoRE \\
\noalign{\smallskip}\bottomrule
\\
\multicolumn{13}{l}{\textit{Saccades}}\\
\toprule\noalign{\smallskip}
& \multicolumn{4}{l}{Images} & \multicolumn{4}{l}{Dots} & \multicolumn{4}{l}{Videos}\\
Algorithm & Mean & SD & \# & rank & Mean & SD & \# & rank & Mean & SD & \# & rank \\
\noalign{\smallskip}\hline\noalign{\smallskip}
MN & \SACimgmnMN & \SACimgsdMN & \SACimgnoMN & \rankSACimgMN & \SACdotsmnMN & \SACdotssdMN & \SACdotsnoMN & \rankSACdotsMN & \SACvideomnMN & \SACvideosdMN & \SACvideonoMN & \rankSACvideoMN \\
RA & \SACimgmnRA & \SACimgsdRA & \SACimgnoRA & \rankSACimgRA & \SACdotsmnRA & \SACdotssdRA & \SACdotsnoRA & \rankSACdotsRA & \SACvideomnRA & \SACvideosdRA & \SACvideonoRA & \rankSACvideoRA \\
CDT & - & - & - & - & - & - & - & - & - & - & - & - \\
EM & \SACimgmnEM & \SACimgsdEM & \SACimgnoEM & \rankSACimgEM & \SACdotsmnEM & \SACdotssdEM & \SACdotsnoEM & \rankSACdotsEM & \SACvideomnEM & \SACvideosdEM & \SACvideonoEM & \rankSACvideoEM \\
IDT & \SACimgmnIDT & \SACimgsdIDT & \SACimgnoIDT & \rankSACimgIDT & \SACdotsmnIDT & \SACdotssdIDT & \SACdotsnoIDT & \rankSACdotsIDT & \SACvideomnIDT & \SACvideosdIDT & \SACvideonoIDT & \rankSACvideoIDT \\
IKF & \SACimgmnIKF & \SACimgsdIKF & \SACimgnoIKF & \rankSACimgIKF & \SACdotsmnIKF & \SACdotssdIKF & \SACdotsnoIKF & \rankSACdotsIKF & \SACvideomnIKF & \SACvideosdIKF & \SACvideonoIKF & \rankSACvideoIKF \\
IMST & \SACimgmnIMST & \SACimgsdIMST & \SACimgnoIMST & \rankSACimgIMST & \SACdotsmnIMST & \SACdotssdIMST & \SACdotsnoIMST & \rankSACdotsIMST & \SACvideomnIMST & \SACvideosdIMST & \SACvideonoIMST & \rankSACvideoIMST \\
IHMM & \SACimgmnIHMM & \SACimgsdIHMM & \SACimgnoIHMM & \rankSACimgIHMM & \SACdotsmnIHMM & \SACdotssdIHMM & \SACdotsnoIHMM & \rankSACdotsIHMM & \SACvideomnIHMM & \SACvideosdIHMM & \SACvideonoIHMM & \rankSACvideoIHMM \\
IVT & \SACimgmnIVT & \SACimgsdIVT & \SACimgnoIVT & \rankSACimgIVT & \SACdotsmnIVT & \SACdotssdIVT & \SACdotsnoIVT & \rankSACdotsIVT & \SACvideomnIVT & \SACvideosdIVT & \SACvideonoIVT & \rankSACvideoIVT \\
NH & \SACimgmnNH & \SACimgsdNH & \SACimgnoNH & \rankSACimgNH & \SACdotsmnNH & \SACdotssdNH & \SACdotsnoNH & \rankSACdotsNH & \SACvideomnNH & \SACvideosdNH & \SACvideonoNH & \rankSACvideoNH \\
BIT & - & - & - & - & - & - & - & - & - & - & - & - \\
LNS & \SACimgmnLNS & \SACimgsdLNS & \SACimgnoLNS & \rankSACimgLNS & \SACdotsmnLNS & \SACdotssdLNS & \SACdotsnoLNS & \rankSACdotsLNS & \SACvideomnLNS & \SACvideosdLNS & \SACvideonoLNS & \rankSACvideoLNS \\
\remodnav\ & \SACimgmnRE & \SACimgsdRE & \SACimgnoRE & \rankSACimgRE & \SACdotsmnRE & \SACdotssdRE & \SACdotsnoRE & \rankSACdotsRE & \SACvideomnRE & \SACvideosdRE & \SACvideonoRE & \rankSACvideoRE \\
\noalign{\smallskip}\bottomrule
\\
\multicolumn{13}{l}{\textit{Post-saccadic oscillations}}\\
\toprule\noalign{\smallskip}
& \multicolumn{4}{l}{Images} & \multicolumn{4}{l}{Dots} & \multicolumn{4}{l}{Videos}\\
Algorithm & Mean & SD & \# & rank & Mean & SD & \# & rank & Mean & SD & \# & rank \\
\noalign{\smallskip}\hline\noalign{\smallskip}
MN & \PSOimgmnMN & \PSOimgsdMN & \PSOimgnoMN & \rankPSOimgMN & \PSOdotsmnMN & \PSOdotssdMN & \PSOdotsnoMN & \rankPSOdotsMN & \PSOvideomnMN & \PSOvideosdMN & \PSOvideonoMN & \rankPSOvideoMN \\
RA & \PSOimgmnRA & \PSOimgsdRA & \PSOimgnoRA & \rankPSOimgRA & \PSOdotsmnRA & \PSOdotssdRA & \PSOdotsnoRA & \rankPSOdotsRA & \PSOvideomnRA & \PSOvideosdRA & \PSOvideonoRA & \rankPSOvideoRA \\
NH & \PSOimgmnNH & \PSOimgsdNH & \PSOimgnoNH & \rankPSOimgNH & \PSOdotsmnNH & \PSOdotssdNH & \PSOdotsnoNH & \rankPSOdotsNH & \PSOvideomnNH & \PSOvideosdNH & \PSOvideonoNH & \rankPSOvideoNH \\
LNS & \PSOimgmnLNS & \PSOimgsdLNS & \PSOimgnoLNS & \rankPSOimgLNS & \PSOdotsmnLNS & \PSOdotssdLNS & \PSOdotsnoLNS & \rankPSOdotsLNS & \PSOvideomnLNS & \PSOvideosdLNS & \PSOvideonoLNS & \rankPSOvideoLNS \\
\remodnav\ & \PSOimgmnRE & \PSOimgsdRE & \PSOimgnoRE & \rankPSOimgRE & \PSOdotsmnRE & \PSOdotssdRE & \PSOdotsnoRE & \rankPSOdotsRE & \PSOvideomnRE & \PSOvideosdRE & \PSOvideonoRE & \rankPSOvideoRE \\
\noalign{\smallskip}\hline
\\
\multicolumn{13}{l}{\textit{Pursuit}}\\
\toprule\noalign{\smallskip}
& \multicolumn{4}{l}{Images} & \multicolumn{4}{l}{Dots} & \multicolumn{4}{l}{Videos}\\
Algorithm & Mean & SD & \# & rank & Mean & SD & \# & rank & Mean & SD & \# & rank \\
\noalign{\smallskip}\hline\noalign{\smallskip}
MN & \PURimgmnMN & \PURimgsdMN & \PURimgnoMN & \rankPURimgMN & \PURdotsmnMN & \PURdotssdMN & \PURdotsnoMN & \rankPURdotsMN & \PURvideomnMN & \PURvideosdMN & \PURvideonoMN & \rankPURvideoMN \\
RA & \PURimgmnRA & \PURimgsdRA & \PURimgnoRA & \rankPURimgRA & \PURdotsmnRA & \PURdotssdRA & \PURdotsnoRA & \rankPURdotsRA & \PURvideomnRA & \PURvideosdRA & \PURvideonoRA & \rankPURvideoRA \\
\remodnav\ & \PURimgmnRE & \PURimgsdRE & \PURimgnoRE & \rankPURimgRE & \PURdotsmnRE & \PURdotssdRE & \PURdotsnoRE & \rankPURdotsRE & \PURvideomnRE & \PURvideosdRE & \PURvideonoRE & \rankPURvideoRE \\
\noalign{\smallskip}\bottomrule
\end{tabular*}
\end{small}