-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathref.bib
5562 lines (5210 loc) · 630 KB
/
ref.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@online{10xgenomics10X2023,
title = {{{10X}}},
author = {10X Genomics},
date = {2023},
url = {https://www.10xgenomics.com/},
abstract = {Resolving Biology to Advance Human Health},
langid = {english},
organization = {10x Genomics},
file = {/Users/Nasy/Zotero/storage/S3JUZCYN/www.10xgenomics.com.html}
}
@article{abramsonAccurateStructurePrediction2024,
title = {Accurate Structure Prediction of Biomolecular Interactions with {{AlphaFold}} 3},
author = {Abramson, Josh and Adler, Jonas and Dunger, Jack and Evans, Richard and Green, Tim and Pritzel, Alexander and Ronneberger, Olaf and Willmore, Lindsay and Ballard, Andrew J. and Bambrick, Joshua and Bodenstein, Sebastian W. and Evans, David A. and Hung, Chia-Chun and O’Neill, Michael and Reiman, David and Tunyasuvunakool, Kathryn and Wu, Zachary and Žemgulytė, Akvilė and Arvaniti, Eirini and Beattie, Charles and Bertolli, Ottavia and Bridgland, Alex and Cherepanov, Alexey and Congreve, Miles and Cowen-Rivers, Alexander I. and Cowie, Andrew and Figurnov, Michael and Fuchs, Fabian B. and Gladman, Hannah and Jain, Rishub and Khan, Yousuf A. and Low, Caroline M. R. and Perlin, Kuba and Potapenko, Anna and Savy, Pascal and Singh, Sukhdeep and Stecula, Adrian and Thillaisundaram, Ashok and Tong, Catherine and Yakneen, Sergei and Zhong, Ellen D. and Zielinski, Michal and Žídek, Augustin and Bapst, Victor and Kohli, Pushmeet and Jaderberg, Max and Hassabis, Demis and Jumper, John M.},
date = {2024-06},
journaltitle = {Nature},
volume = {630},
number = {8016},
pages = {493--500},
publisher = {Nature Publishing Group},
issn = {1476-4687},
doi = {10.1038/s41586-024-07487-w},
url = {https://www.nature.com/articles/s41586-024-07487-w},
urldate = {2024-10-30},
abstract = {The introduction of AlphaFold\,21 has spurred a revolution in modelling the structure of proteins and their interactions, enabling a huge range of applications in protein modelling and design2–6. Here we describe our AlphaFold\,3 model with a substantially updated diffusion-based architecture that is capable of predicting the joint structure of complexes including proteins, nucleic acids, small molecules, ions and modified residues. The new AlphaFold model demonstrates substantially improved accuracy over many previous specialized tools: far greater accuracy for protein–ligand interactions compared with state-of-the-art docking tools, much higher accuracy for protein–nucleic acid interactions compared with nucleic-acid-specific predictors and substantially higher antibody–antigen prediction accuracy compared with AlphaFold-Multimer v.2.37,8. Together, these results show that high-accuracy modelling across biomolecular space is possible within a single unified deep-learning framework.},
langid = {english},
keywords = {Drug discovery,Machine learning,Protein structure predictions,Structural biology},
file = {/Users/Nasy/Zotero/storage/ACEATQ5V/Abramson et al. - 2024 - Accurate structure prediction of biomolecular interactions with AlphaFold 3.pdf}
}
@online{AccuratePredictionProtein,
title = {Accurate Prediction of Protein Structures and Interactions Using a Three-Track Neural Network},
url = {https://www.science.org/doi/10.1126/science.abj8754},
urldate = {2022-06-06},
file = {/Users/Nasy/Zotero/storage/Z7HR8Q7A/science.html}
}
@inproceedings{akibaOptunaNextgenerationHyperparameter2019,
title = {Optuna: {{A Next-generation Hyperparameter Optimization Framework}}},
shorttitle = {Optuna},
booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
author = {Akiba, Takuya and Sano, Shotaro and Yanase, Toshihiko and Ohta, Takeru and Koyama, Masanori},
date = {2019-07-25},
series = {{{KDD}} '19},
pages = {2623--2631},
publisher = {Association for Computing Machinery},
location = {New York, NY, USA},
doi = {10.1145/3292500.3330701},
url = {https://doi.org/10.1145/3292500.3330701},
urldate = {2023-08-15},
abstract = {The purpose of this study is to introduce new design-criteria for next-generation hyperparameter optimization software. The criteria we propose include (1) define-by-run API that allows users to construct the parameter search space dynamically, (2) efficient implementation of both searching and pruning strategies, and (3) easy-to-setup, versatile architecture that can be deployed for various purposes, ranging from scalable distributed computing to light-weight experiment conducted via interactive interface. In order to prove our point, we will introduce Optuna, an optimization software which is a culmination of our effort in the development of a next generation optimization software. As an optimization software designed with define-by-run principle, Optuna is particularly the first of its kind. We will present the design-techniques that became necessary in the development of the software that meets the above criteria, and demonstrate the power of our new design through experimental results and real world applications. Our software is available under the MIT license (https://github.com/pfnet/optuna/).},
isbn = {978-1-4503-6201-6},
keywords = {Bayesian optimization,black-box optimization,hyperparameter optimization,machine learning system},
file = {/Users/Nasy/Zotero/storage/7FLDVKJX/Akiba et al. - 2019 - Optuna A Next-generation Hyperparameter Optimization Framework.pdf}
}
@inproceedings{akyurekWhatLearningAlgorithm2023,
title = {\hspace{0pt}\hspace{0pt}{{What}} Learning Algorithm Is In-Context Learning? {{Investigations}} with Linear Models},
shorttitle = {\hspace{0pt}\hspace{0pt}{{What}} Learning Algorithm Is In-Context Learning?},
author = {Akyürek, Ekin and Schuurmans, Dale and Andreas, Jacob and Ma, Tengyu and Zhou, Denny},
date = {2023-02-01},
url = {https://openreview.net/forum?id=0g0X4H8yN4I},
abstract = {Neural sequence models, especially transformers, exhibit a remarkable capacity for in-context learning. They can construct new predictors from sequences of labeled examples \$(x, f(x))\$ presented in the input without further parameter updates. We investigate the hypothesis that transformer-based in-context learners implement standard learning algorithms implicitly, by encoding context-specific parametric models in their hidden representations, and updating these implicit models as new examples appear in the context. Using linear regression as a model problem, we offer three sources of evidence for this hypothesis. First, we prove by construction that transformers can implement learning algorithms for linear models based on gradient descent and closed-form computation of regression parameters. Second, we show that trained in-context learners closely match the predictors computed by gradient descent, ridge regression, and exact least-squares regression, transitioning between different predictors as transformer depth and dataset noise vary. Third, we present preliminary evidence that in-context learners share algorithmic features with these predictors: learners' late layers encode weight vectors and moment matrices. These results suggest that in-context learning is understandable in algorithmic terms, and that (at least in the linear case) learners may work by rediscovering standard estimation algorithms.},
eventtitle = {The {{Eleventh International Conference}} on {{Learning Representations}}},
langid = {english},
file = {/Users/Nasy/Zotero/storage/ZWQ2N4G3/Akyürek et al. - 2023 - What learning algorithm is in-context learning .pdf}
}
@article{anonymousRethinkingRoleDemonstrations2022,
title = {Rethinking the {{Role}} of {{Demonstrations}}: {{What Makes In-Context Learning Work}}?},
shorttitle = {Rethinking the {{Role}} of {{Demonstrations}}},
author = {Anonymous},
date = {2022-04-21},
url = {https://openreview.net/forum?id=cnRGMv-Ak7u},
urldate = {2023-04-09},
abstract = {Large language models (LMs) are able to in-context learn -- perform a new task via inference alone by conditioning on a few input-label pairs (demonstrations) and making predictions for new inputs. However, there has been little understanding of how the model learns and which aspects of the demonstrations contribute to end task performance. In this paper, we show that ground truth demonstrations are in fact not required -- randomly replacing labels in the demonstrations barely hurts performance, consistently over 12 different models including GPT-3. Instead, we find that other aspects of the demonstrations are the key drivers of end task performance, including the fact that they provide a few examples of (1) the label space, (2) the distribution of the input text, and (3) the overall format of the sequence. Together, our analysis provides a new way of understanding how and why in-context learning works, while opening up new questions about how much can be learned from large language models through inference alone.},
langid = {english},
keywords = {⛔ No DOI found},
file = {/Users/Nasy/Zotero/storage/TW9Q2VUA/Anonymous - 2022 - Rethinking the Role of Demonstrations What Makes .pdf}
}
@online{AntigenSpecificTCRSignatures,
title = {Antigen-{{Specific TCR Signatures}} of {{Cytomegalovirus Infection}} | {{The Journal}} of {{Immunology}}},
url = {https://www.jimmunol.org/content/202/3/979},
urldate = {2022-03-19},
file = {/Users/Nasy/Zotero/storage/VZCL8MY4/979.html}
}
@article{antunesInterpretingTCellCrossreactivity2017,
title = {Interpreting {{T-Cell Cross-reactivity}} through {{Structure}}: {{Implications}} for {{TCR-Based Cancer Immunotherapy}}},
shorttitle = {Interpreting {{T-Cell Cross-reactivity}} through {{Structure}}},
author = {Antunes, Dinler A. and Rigo, Maurício M. and Freitas, Martiela V. and Mendes, Marcus F. A. and Sinigaglia, Marialva and Lizée, Gregory and Kavraki, Lydia E. and Selin, Liisa K. and Cornberg, Markus and Vieira, Gustavo F.},
date = {2017},
journaltitle = {Frontiers in Immunology},
volume = {8},
issn = {1664-3224},
doi = {10.3389/fimmu.2017.01210},
url = {https://www.frontiersin.org/articles/10.3389/fimmu.2017.01210},
urldate = {2022-08-19},
abstract = {Immunotherapy has become one of the most promising avenues for cancer treatment, making use of the patient’s own immune system to eliminate cancer cells. Clinical trials with T-cell-based immunotherapies have shown dramatic tumor regressions, being effective in multiple cancer types and for many different patients. Unfortunately, this progress was tempered by reports of serious (even fatal) side effects. Such therapies rely on the use of cytotoxic T-cell lymphocytes, an essential part of the adaptive immune system. Cytotoxic T-cells are regularly involved in surveillance and are capable of both eliminating diseased cells and generating protective immunological memory. The specificity of a given T-cell is determined through the structural interaction between the T-cell receptor (TCR) and a peptide-loaded major histocompatibility complex (MHC); i.e., an intracellular peptide–ligand displayed at the cell surface by an MHC molecule. However, a given TCR can recognize different peptide–MHC (pMHC) complexes, which can sometimes trigger an unwanted response that is referred to as T-cell cross-reactivity. This has become a major safety issue in TCR-based immunotherapies, following reports of melanoma-specific T-cells causing cytotoxic damage to healthy tissues (e.g., heart and nervous system). T-cell cross-reactivity has been extensively studied in the context of viral immunology and tissue transplantation. Growing evidence suggests that it is largely driven by structural similarities of seemingly unrelated pMHC complexes. Here, we review recent reports about the existence of pMHC “hot-spots” for cross-reactivity and propose the existence of a TCR interaction profile (i.e., a refinement of a more general TCR footprint in which some amino acid residues are more important than others in triggering T-cell cross-reactivity). We also make use of available structural data and pMHC models to interpret previously reported cross-reactivity patterns among virus-derived peptides. Our study provides further evidence that structural analyses of pMHC complexes can be used to assess the intrinsic likelihood of cross-reactivity among peptide-targets. Furthermore, we hypothesize that some apparent inconsistencies in reported cross-reactivities, such as a preferential directionality, might also be driven by particular structural features of the targeted pMHC complex. Finally, we explain why TCR-based immunotherapy provides a special context in which meaningful T-cell cross-reactivity predictions can be made.},
file = {/Users/Nasy/Zotero/storage/M6NYJUEH/Antunes et al. - 2017 - Interpreting T-Cell Cross-reactivity through Struc.pdf}
}
@article{antunesInterpretingTCellCrossreactivity2017a,
title = {Interpreting {{T-Cell Cross-reactivity}} through {{Structure}}: {{Implications}} for {{TCR-Based Cancer Immunotherapy}}},
shorttitle = {Interpreting {{T-Cell Cross-reactivity}} through {{Structure}}},
author = {Antunes, Dinler A. and Rigo, Maurício M. and Freitas, Martiela V. and Mendes, Marcus F. A. and Sinigaglia, Marialva and Lizée, Gregory and Kavraki, Lydia E. and Selin, Liisa K. and Cornberg, Markus and Vieira, Gustavo F.},
date = {2017},
journaltitle = {Frontiers in Immunology},
volume = {8},
issn = {1664-3224},
url = {https://www.frontiersin.org/articles/10.3389/fimmu.2017.01210},
urldate = {2023-08-15},
abstract = {Immunotherapy has become one of the most promising avenues for cancer treatment, making use of the patient’s own immune system to eliminate cancer cells. Clinical trials with T-cell-based immunotherapies have shown dramatic tumor regressions, being effective in multiple cancer types and for many different patients. Unfortunately, this progress was tempered by reports of serious (even fatal) side effects. Such therapies rely on the use of cytotoxic T-cell lymphocytes, an essential part of the adaptive immune system. Cytotoxic T-cells are regularly involved in surveillance and are capable of both eliminating diseased cells and generating protective immunological memory. The specificity of a given T-cell is determined through the structural interaction between the T-cell receptor (TCR) and a peptide-loaded major histocompatibility complex (MHC); i.e., an intracellular peptide–ligand displayed at the cell surface by an MHC molecule. However, a given TCR can recognize different peptide–MHC (pMHC) complexes, which can sometimes trigger an unwanted response that is referred to as T-cell cross-reactivity. This has become a major safety issue in TCR-based immunotherapies, following reports of melanoma-specific T-cells causing cytotoxic damage to healthy tissues (e.g., heart and nervous system). T-cell cross-reactivity has been extensively studied in the context of viral immunology and tissue transplantation. Growing evidence suggests that it is largely driven by structural similarities of seemingly unrelated pMHC complexes. Here, we review recent reports about the existence of pMHC “hot-spots” for cross-reactivity and propose the existence of a TCR interaction profile (i.e., a refinement of a more general TCR footprint in which some amino acid residues are more important than others in triggering T-cell cross-reactivity). We also make use of available structural data and pMHC models to interpret previously reported cross-reactivity patterns among virus-derived peptides. Our study provides further evidence that structural analyses of pMHC complexes can be used to assess the intrinsic likelihood of cross-reactivity among peptide-targets. Furthermore, we hypothesize that some apparent inconsistencies in reported cross-reactivities, such as a preferential directionality, might also be driven by particular structural features of the targeted pMHC complex. Finally, we explain why TCR-based immunotherapy provides a special context in which meaningful T-cell cross-reactivity predictions can be made.},
file = {/Users/Nasy/Zotero/storage/XZ3CQGJP/Antunes et al. - 2017 - Interpreting T-Cell Cross-reactivity through Structure Implications for TCR-Based Cancer Immunother.pdf}
}
@article{arnaudSensitiveIdentificationNeoantigens2022,
title = {Sensitive Identification of Neoantigens and Cognate {{TCRs}} in Human Solid Tumors},
author = {Arnaud, Marion and Chiffelle, Johanna and Genolet, Raphael and Navarro Rodrigo, Blanca and Perez, Marta A. S. and Huber, Florian and Magnin, Morgane and Nguyen-Ngoc, Tu and Guillaume, Philippe and Baumgaertner, Petra and Chong, Chloe and Stevenson, Brian J. and Gfeller, David and Irving, Melita and Speiser, Daniel E. and Schmidt, Julien and Zoete, Vincent and Kandalaft, Lana E. and Bassani-Sternberg, Michal and Bobisse, Sara and Coukos, George and Harari, Alexandre},
date = {2022-05},
journaltitle = {Nat Biotechnol},
volume = {40},
number = {5},
pages = {656--660},
publisher = {Nature Publishing Group},
issn = {1546-1696},
doi = {10.1038/s41587-021-01072-6},
url = {https://www.nature.com/articles/s41587-021-01072-6},
urldate = {2023-08-18},
abstract = {The identification of patient-specific tumor antigens is complicated by the low frequency of T cells specific for each tumor antigen. Here we describe NeoScreen, a method that enables the sensitive identification of rare tumor (neo)antigens and of cognate T cell receptors (TCRs) expressed by tumor-infiltrating lymphocytes. T cells transduced with tumor antigen-specific TCRs identified by NeoScreen mediate regression of established tumors in patient-derived xenograft mice.},
issue = {5},
langid = {english},
keywords = {Cancer immunotherapy,T-cell receptor,Translational research},
file = {/Users/Nasy/Zotero/storage/S6R6BHZ3/Arnaud et al. - 2022 - Sensitive identification of neoantigens and cognate TCRs in human solid tumors.pdf}
}
@article{atchleySolvingProteinSequence2005,
title = {Solving the Protein Sequence Metric Problem},
author = {Atchley, William R. and Zhao, Jieping and Fernandes, Andrew D. and Drüke, Tanja},
date = {2005-05-03},
journaltitle = {Proceedings of the National Academy of Sciences},
volume = {102},
number = {18},
pages = {6395--6400},
publisher = {Proceedings of the National Academy of Sciences},
doi = {10.1073/pnas.0408677102},
url = {https://www.pnas.org/doi/full/10.1073/pnas.0408677102},
urldate = {2022-05-12},
annotation = {293 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/FXRKHVBC/Atchley et al. - 2005 - Solving the protein sequence metric problem.pdf}
}
@article{atchleySolvingProteinSequence2005a,
title = {Solving the Protein Sequence Metric Problem},
author = {Atchley, William R. and Zhao, Jieping and Fernandes, Andrew D. and Drüke, Tanja},
date = {2005-05-03},
journaltitle = {Proceedings of the National Academy of Sciences},
volume = {102},
number = {18},
pages = {6395--6400},
publisher = {Proceedings of the National Academy of Sciences},
doi = {10.1073/pnas.0408677102},
url = {https://www.pnas.org/doi/10.1073/pnas.0408677102},
urldate = {2024-10-30},
abstract = {Biological sequences are composed of long strings of alphabetic letters rather than arrays of numerical values. Lack of a natural underlying metric for comparing such alphabetic data significantly inhibits sophisticated statistical analyses of sequences, modeling structural and functional aspects of proteins, and related problems. Herein, we use multivariate statistical analyses on almost 500 amino acid attributes to produce a small set of highly interpretable numeric patterns of amino acid variability. These high-dimensional attribute data are summarized by five multidimensional patterns of attribute covariation that reflect polarity, secondary structure, molecular volume, codon diversity, and electrostatic charge. Numerical scores for each amino acid then transform amino acid sequences for statistical analyses. Relationships between transformed data and amino acid substitution matrices show significant associations for polarity and codon diversity scores. Transformed alphabetic data are used in analysis of variance and discriminant analysis to study DNA binding in the basic helix-loop-helix proteins. The transformed scores offer a general solution for analyzing a wide variety of sequence analysis problems.},
file = {/Users/Nasy/Zotero/storage/UFNE47QG/Atchley et al. - 2005 - Solving the protein sequence metric problem.pdf}
}
@online{bachmannConstantCurvatureGraph2020,
title = {Constant {{Curvature Graph Convolutional Networks}}},
author = {Bachmann, Gregor and Bécigneul, Gary and Ganea, Octavian-Eugen},
date = {2020-05-19},
eprint = {1911.05076},
eprinttype = {arXiv},
eprintclass = {cs},
doi = {10.48550/arXiv.1911.05076},
url = {http://arxiv.org/abs/1911.05076},
abstract = {Interest has been rising lately towards methods representing data in non-Euclidean spaces, e.g. hyperbolic or spherical, that provide specific inductive biases useful for certain real-world data properties, e.g. scale-free, hierarchical or cyclical. However, the popular graph neural networks are currently limited in modeling data only via Euclidean geometry and associated vector space operations. Here, we bridge this gap by proposing mathematically grounded generalizations of graph convolutional networks (GCN) to (products of) constant curvature spaces. We do this by i) introducing a unified formalism that can interpolate smoothly between all geometries of constant curvature, ii) leveraging gyro-barycentric coordinates that generalize the classic Euclidean concept of the center of mass. Our class of models smoothly recover their Euclidean counterparts when the curvature goes to zero from either side. Empirically, we outperform Euclidean GCNs in the tasks of node classification and distortion minimization for symbolic data exhibiting non-Euclidean behavior, according to their discrete curvature.},
pubstate = {prepublished},
keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning},
file = {/Users/Nasy/Zotero/storage/ENAHPLJ4/Bachmann et al. - 2020 - Constant Curvature Graph Convolutional Networks.pdf;/Users/Nasy/Zotero/storage/7F3DBCJN/1911.html}
}
@article{baekAccuratePredictionProtein2021,
title = {Accurate Prediction of Protein Structures and Interactions Using a Three-Track Neural Network},
author = {Baek, Minkyung and DiMaio, Frank and Anishchenko, Ivan and Dauparas, Justas and Ovchinnikov, Sergey and Lee, Gyu Rie and Wang, Jue and Cong, Qian and Kinch, Lisa N. and Schaeffer, R. Dustin and Millán, Claudia and Park, Hahnbeom and Adams, Carson and Glassman, Caleb R. and DeGiovanni, Andy and Pereira, Jose H. and Rodrigues, Andria V. and van Dijk, Alberdina A. and Ebrecht, Ana C. and Opperman, Diederik J. and Sagmeister, Theo and Buhlheller, Christoph and Pavkov-Keller, Tea and Rathinaswamy, Manoj K. and Dalwadi, Udit and Yip, Calvin K. and Burke, John E. and Garcia, K. Christopher and Grishin, Nick V. and Adams, Paul D. and Read, Randy J. and Baker, David},
options = {useprefix=true},
date = {2021-08-20},
journaltitle = {Science},
volume = {373},
number = {6557},
pages = {871--876},
issn = {0036-8075, 1095-9203},
doi = {10.1126/science.abj8754},
url = {https://www.science.org/doi/10.1126/science.abj8754},
abstract = {Deep learning takes on protein folding In 1972, Anfinsen won a Nobel prize for demonstrating a connection between a protein’s amino acid sequence and its three-dimensional structure. Since 1994, scientists have competed in the biannual Critical Assessment of Structure Prediction (CASP) protein-folding challenge. Deep learning methods took center stage at CASP14, with DeepMind’s Alphafold2 achieving remarkable accuracy. Baek et al . explored network architectures based on the DeepMind framework. They used a three-track network to process sequence, distance, and coordinate information simultaneously and achieved accuracies approaching those of DeepMind. The method, RoseTTA fold, can solve challenging x-ray crystallography and cryo–electron microscopy modeling problems and generate accurate models of protein-protein complexes. —VV , Protein structure modeling enables the rapid solution of protein structures and provides insights into function. , DeepMind presented notably accurate predictions at the recent 14th Critical Assessment of Structure Prediction (CASP14) conference. We explored network architectures that incorporate related ideas and obtained the best performance with a three-track network in which information at the one-dimensional (1D) sequence level, the 2D distance map level, and the 3D coordinate level is successively transformed and integrated. The three-track network produces structure predictions with accuracies approaching those of DeepMind in CASP14, enables the rapid solution of challenging x-ray crystallography and cryo–electron microscopy structure modeling problems, and provides insights into the functions of proteins of currently unknown structure. The network also enables rapid generation of accurate protein-protein complex models from sequence information alone, short-circuiting traditional approaches that require modeling of individual subunits followed by docking. We make the method available to the scientific community to speed biological research.},
langid = {english},
annotation = {815 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/4C5HGF4D/Baek et al. - 2021 - Accurate prediction of protein structures and inte.pdf;/Users/Nasy/Zotero/storage/N56PEIFE/abj8754_baek_sm.pdf}
}
@article{bagaevVDJdb2019Database2020,
title = {{{VDJdb}} in 2019: Database Extension, New Analysis Infrastructure and a {{T-cell}} Receptor Motif Compendium},
shorttitle = {{{VDJdb}} in 2019},
author = {Bagaev, Dmitry V. and Vroomans, Renske M. A. and Samir, Jerome and Stervbo, Ulrik and Rius, Cristina and Dolton, Garry and Greenshields-Watson, Alexander and Attaf, Meriem and Egorov, Evgeny S. and Zvyagin, Ivan V. and Babel, Nina and Cole, David K. and Godkin, Andrew J. and Sewell, Andrew K. and Kesmir, Can and Chudakov, Dmitriy M. and Luciani, Fabio and Shugay, Mikhail},
date = {2020-01-08},
journaltitle = {Nucleic Acids Res},
volume = {48},
number = {D1},
eprint = {31588507},
eprinttype = {pmid},
pages = {D1057-D1062},
issn = {1362-4962},
doi = {10.1093/nar/gkz874},
abstract = {Here, we report an update of the VDJdb database with a substantial increase in the number of T-cell receptor (TCR) sequences and their cognate antigens. The update further provides a new database infrastructure featuring two additional analysis modes that facilitate database querying and real-world data analysis. The increased yield of TCR specificity identification methods and the overall increase in the number of studies in the field has allowed us to expand the database more than 5-fold. Furthermore, several new analysis methods are included. For example, batch annotation of TCR repertoire sequencing samples allows for annotating large datasets on-line. Using recently developed bioinformatic methods for TCR motif mining, we have built a reduced set of high-quality TCR motifs that can be used for both training TCR specificity predictors and matching against TCRs of interest. These additions enhance the versatility of the VDJdb in the task of exploring T-cell antigen specificities. The database is available at https://vdjdb.cdr3.net.},
langid = {english},
pmcid = {PMC6943061},
keywords = {Amino Acid Sequence,Computational Biology,Databases Genetic,High-Throughput Nucleotide Sequencing,Humans,Nucleotide Motifs,Position-Specific Scoring Matrices,Receptors Antigen T-Cell,Sequence Analysis DNA,Software,V(D)J Recombination,Web Browser},
annotation = {139 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/E8CLVMGU/Bagaev et al. - 2020 - VDJdb in 2019 database extension, new analysis in.pdf}
}
@inproceedings{balntasLearningLocalFeature2016,
title = {Learning Local Feature Descriptors with Triplets and Shallow Convolutional Neural Networks},
booktitle = {Procedings of the {{British Machine Vision Conference}} 2016},
author = {Balntas, Vassileios and Riba, Edgar and Ponsa, Daniel and Mikolajczyk, Krystian},
date = {2016},
pages = {119.1-119.11},
publisher = {British Machine Vision Association},
location = {York, UK},
doi = {10.5244/C.30.119},
url = {http://www.bmva.org/bmvc/2016/papers/paper119/index.html},
urldate = {2022-05-12},
eventtitle = {British {{Machine Vision Conference}} 2016},
isbn = {978-1-901725-59-9},
langid = {english},
keywords = {nosource},
annotation = {122 citations (Crossref) [2022-08-03]}
}
@unpublished{baltusConvolutionalNeuralNetwork2022,
title = {Convolutional Neural Network for Gravitational-Wave Early Alert: {{Going}} down in Frequency},
shorttitle = {Convolutional Neural Network for Gravitational-Wave Early Alert},
author = {Baltus, Grégory and Janquart, Justin and Lopez, Melissa and Narola, Harsh and Cudell, Jean-René},
date = {2022-05-10},
eprint = {2205.04750},
eprinttype = {arXiv},
eprintclass = {gr-qc},
url = {http://arxiv.org/abs/2205.04750},
urldate = {2022-05-11},
abstract = {We present here the latest development of a machine-learning pipeline for pre-merger alerts from gravitational waves coming from binary neutron stars. This work starts from the convolutional neural networks introduced in our previous paper (PhysRevD.103.102003) that searched for three classes of early inspirals in simulated Gaussian noise colored with the design-sensitivity power-spectral density of LIGO. Our new network is able to search for any type of binary neutron stars, it can take into account all the detectors available, and it can see the events even earlier than the previous one. We study the performance of our method in three different types of noise: Gaussian O3 noise, real O3 noise, and predicted O4 noise. We show that our network performs almost as well in non-Gaussian noise as in Gaussian noise: our method is robust w.r.t. glitches and artifacts present in real noise. Although it would not have been able to trigger on the BNSs detected during O3 because their signal-to-noise ratio was too weak, we expect our network to find around 3 BNSs during O4 with a time before the merger between 3 and 88 s in advance.},
keywords = {General Relativity and Quantum Cosmology},
file = {/Users/Nasy/Zotero/storage/IWWT7H5B/Baltus et al. - 2022 - Convolutional neural network for gravitational-wav.pdf;/Users/Nasy/Zotero/storage/KBX3UDA4/2205.html}
}
@unpublished{bankAutoencoders2021,
title = {Autoencoders},
author = {Bank, Dor and Koenigstein, Noam and Giryes, Raja},
date = {2021-04-03},
eprint = {2003.05991},
eprinttype = {arXiv},
eprintclass = {cs, stat},
url = {http://arxiv.org/abs/2003.05991},
abstract = {An autoencoder is a specific type of a neural network, which is mainly designed to encode the input into a compressed and meaningful representation, and then decode it back such that the reconstructed input is similar as possible to the original one. This chapter surveys the different types of autoencoders that are mainly used today. It also describes various applications and use-cases of autoencoders.},
keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Statistics - Machine Learning},
file = {/Users/Nasy/Zotero/storage/INB7XMFW/Bank et al. - 2021 - Autoencoders.pdf;/Users/Nasy/Zotero/storage/DNE553YC/2003.html}
}
@article{bayleyRobustMachineLearning2020,
title = {Robust Machine Learning Algorithm to Search for Continuous Gravitational Waves},
author = {Bayley, Joe and Messenger, Chris and Woan, Graham},
date = {2020-10-21},
journaltitle = {Phys. Rev. D},
volume = {102},
number = {8},
pages = {083024},
publisher = {American Physical Society},
doi = {10.1103/PhysRevD.102.083024},
url = {https://link.aps.org/doi/10.1103/PhysRevD.102.083024},
urldate = {2021-04-29},
abstract = {Many continuous gravitational wave searches are affected by instrumental spectral lines that could be confused with a continuous astrophysical signal. Several techniques have been developed to limit the effect of these lines by penalizing signals that appear in only a single detector. We have developed a general method, using a convolutional neural network, to reduce the impact of instrumental artifacts on searches that use the SOAP algorithm Bayley et al. [Phys. Rev. D 100, 023006 (2019)]. The method can identify features in corresponding frequency bands of each detector and classify these bands as containing a signal, an instrumental line, or noise. We tested the method against four different datasets: Gaussian noise with time gaps, data from the final run of Initial LIGO (S6) with signals added, the reference S6 mock data challenge dataset Walsh et al. [Phys. Rev. D 94, 124010 (2016)] and signals injected into data from the second advanced LIGO observing run (O2). Using the S6 mock data challenge dataset and at a 1\% false alarm probability we showed that at 95\% efficiency a fully automated SOAP search has a sensitivity corresponding to a coherent signal-to-noise ratio of 110, equivalent to a sensitivity depth of 10 Hz−1/2, making this automated search competitive with other searches requiring significantly more computing resources and human intervention.},
issue = {8},
annotation = {8 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/FBGYXIBY/Bayley et al. - 2020 - Robust machine learning algorithm to search for co.pdf;/Users/Nasy/Zotero/storage/6RZYY7ZT/PhysRevD.102.html}
}
@book{benedettiLecturesHyperbolicGeometry1992,
title = {Lectures on {{Hyperbolic Geometry}}},
author = {Benedetti, Riccardo and Petronio, Carlo},
date = {1992},
series = {Universitext},
publisher = {Springer},
location = {Berlin, Heidelberg},
doi = {10.1007/978-3-642-58158-8},
url = {http://link.springer.com/10.1007/978-3-642-58158-8},
urldate = {2023-03-10},
isbn = {978-3-540-55534-6 978-3-642-58158-8},
keywords = {Cohomology,Flat Fiber Bundles,Geometry of Manifolds,Hyperbolic Geometry,manifold},
file = {/Users/Nasy/Zotero/storage/RCMS5X86/Benedetti and Petronio - 1992 - Lectures on Hyperbolic Geometry.pdf}
}
@online{biPanguWeather3DHighResolution2022,
title = {Pangu-{{Weather}}: {{A 3D High-Resolution Model}} for {{Fast}} and {{Accurate Global Weather Forecast}}},
shorttitle = {Pangu-{{Weather}}},
author = {Bi, Kaifeng and Xie, Lingxi and Zhang, Hengheng and Chen, Xin and Gu, Xiaotao and Tian, Qi},
date = {2022-11-03},
eprint = {2211.02556},
eprinttype = {arXiv},
eprintclass = {physics},
doi = {10.48550/arXiv.2211.02556},
url = {http://arxiv.org/abs/2211.02556},
urldate = {2022-11-11},
abstract = {In this paper, we present Pangu-Weather, a deep learning based system for fast and accurate global weather forecast. For this purpose, we establish a data-driven environment by downloading \$43\$ years of hourly global weather data from the 5th generation of ECMWF reanalysis (ERA5) data and train a few deep neural networks with about \$256\$ million parameters in total. The spatial resolution of forecast is \$0.25\textasciicircum\textbackslash circ\textbackslash times0.25\textasciicircum\textbackslash circ\$, comparable to the ECMWF Integrated Forecast Systems (IFS). More importantly, for the first time, an AI-based method outperforms state-of-the-art numerical weather prediction (NWP) methods in terms of accuracy (latitude-weighted RMSE and ACC) of all factors (e.g., geopotential, specific humidity, wind speed, temperature, etc.) and in all time ranges (from one hour to one week). There are two key strategies to improve the prediction accuracy: (i) designing a 3D Earth Specific Transformer (3DEST) architecture that formulates the height (pressure level) information into cubic data, and (ii) applying a hierarchical temporal aggregation algorithm to alleviate cumulative forecast errors. In deterministic forecast, Pangu-Weather shows great advantages for short to medium-range forecast (i.e., forecast time ranges from one hour to one week). Pangu-Weather supports a wide range of downstream forecast scenarios, including extreme weather forecast (e.g., tropical cyclone tracking) and large-member ensemble forecast in real-time. Pangu-Weather not only ends the debate on whether AI-based methods can surpass conventional NWP methods, but also reveals novel directions for improving deep learning weather forecast systems.},
pubstate = {prepublished},
keywords = {Computer Science - Artificial Intelligence,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Physics - Atmospheric and Oceanic Physics},
file = {/Users/Nasy/Zotero/storage/TGACLEGP/Bi et al. - 2022 - Pangu-Weather A 3D High-Resolution Model for Fast.pdf;/Users/Nasy/Zotero/storage/NSSVVBNE/2211.html}
}
@online{blochUniversalModelHyperbolic0100,
title = {A {{Universal Model}} for {{Hyperbolic}}, {{Euclidean}} and {{Spherical Geometries}}},
author = {Bloch, Andreas},
year = {20:00:00 +0100},
url = {http://andbloch.github.io/K-Stereographic-Model/},
urldate = {2023-03-24},
abstract = {This blogpost presents a geometric model that harnesses the formalism gyrovector spaces in order to capture all three geometries of constant curvature at once. Furthermore, the presented model allows to smoothly interpolate between different curvatures in order to learn the curvature of spaces jointly with the embeddings.},
organization = {Andreas Bloch}
}
@article{bonnabelStochasticGradientDescent2013,
title = {Stochastic {{Gradient Descent}} on {{Riemannian Manifolds}}},
author = {Bonnabel, Silvère},
date = {2013-09},
journaltitle = {IEEE Transactions on Automatic Control},
volume = {58},
number = {9},
pages = {2217--2229},
issn = {1558-2523},
doi = {10.1109/TAC.2013.2254619},
abstract = {Stochastic gradient descent is a simple approach to find the local minima of a cost function whose evaluations are corrupted by noise. In this paper, we develop a procedure extending stochastic gradient descent algorithms to the case where the function is defined on a Riemannian manifold. We prove that, as in the Euclidian case, the gradient descent algorithm converges to a critical point of the cost function. The algorithm has numerous potential applications, and is illustrated here by four examples. In particular a novel gossip algorithm on the set of covariance matrices is derived and tested numerically.},
eventtitle = {{{IEEE Transactions}} on {{Automatic Control}}},
keywords = {Approximation methods,Convergence,Cost function,Covariance matrices,Manifolds,Nonlinear identification,Riemannian geometry,Standards,stochastic approximation,Trajectory},
annotation = {170 citations (Crossref) [2023-03-09]},
file = {/Users/Nasy/Zotero/storage/VJ5CADFZ/Bonnabel - 2013 - Stochastic Gradient Descent on Riemannian Manifold.pdf;/Users/Nasy/Zotero/storage/G7Z8YY8Y/6487381.html}
}
@article{bradleyStructurebasedPredictionCell2023,
title = {Structure-Based Prediction of {{T}} Cell Receptor:Peptide-{{MHC}} Interactions},
shorttitle = {Structure-Based Prediction of {{T}} Cell Receptor},
author = {Bradley, Philip},
date = {2023-01-20},
journaltitle = {eLife},
volume = {12},
eprint = {36661395},
eprinttype = {pmid},
pages = {e82813},
issn = {2050-084X},
doi = {10.7554/eLife.82813},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9859041/},
abstract = {The regulatory and effector functions of T cells are initiated by the binding of their cell-surface T cell receptor (TCR) to peptides presented by major histocompatibility complex (MHC) proteins on other cells. The specificity of TCR:peptide-MHC interactions, thus, underlies nearly all adaptive immune responses. Despite intense interest, generalizable predictive models of TCR:peptide-MHC specificity remain out of reach; two key barriers are the diversity of TCR recognition modes and the paucity of training data. Inspired by recent breakthroughs in protein structure prediction achieved by deep neural networks, we evaluated structural modeling as a potential avenue for prediction of TCR epitope specificity. We show that a specialized version of the neural network predictor AlphaFold can generate models of TCR:peptide-MHC interactions that can be used to discriminate correct from incorrect peptide epitopes with substantial accuracy. Although much work remains to be done for these predictions to have widespread practical utility, we are optimistic that deep learning-based structural modeling represents a path to generalizable prediction of TCR:peptide-MHC interaction specificity.},
pmcid = {PMC9859041},
file = {/Users/Nasy/Zotero/storage/EB7SX9BA/Bradley - Structure-based prediction of T cell receptorpeptide-MHC interactions.pdf}
}
@article{brahmaWhyDeepLearning2016,
title = {Why {{Deep Learning Works}}: {{A Manifold Disentanglement Perspective}}},
shorttitle = {Why {{Deep Learning Works}}},
author = {Brahma, Pratik Prabhanjan and Wu, Dapeng and She, Yiyuan},
date = {2016-10},
journaltitle = {IEEE Transactions on Neural Networks and Learning Systems},
volume = {27},
number = {10},
pages = {1997--2008},
issn = {2162-2388},
doi = {10.1109/TNNLS.2015.2496947},
abstract = {Deep hierarchical representations of the data have been found out to provide better informative features for several machine learning applications. In addition, multilayer neural networks surprisingly tend to achieve better performance when they are subject to an unsupervised pretraining. The booming of deep learning motivates researchers to identify the factors that contribute to its success. One possible reason identified is the flattening of manifold-shaped data in higher layers of neural networks. However, it is not clear how to measure the flattening of such manifold-shaped data and what amount of flattening a deep neural network can achieve. For the first time, this paper provides quantitative evidence to validate the flattening hypothesis. To achieve this, we propose a few quantities for measuring manifold entanglement under certain assumptions and conduct experiments with both synthetic and real-world data. Our experimental results validate the proposition and lead to new insights on deep learning.},
eventtitle = {{{IEEE Transactions}} on {{Neural Networks}} and {{Learning Systems}}},
keywords = {Data models,Deep learning,disentanglement,Kernel,Machine learning,manifold learning,Manifolds,Neural networks,Nonhomogeneous media,Principal component analysis,unsupervised feature transformation},
file = {/Users/Nasy/Zotero/storage/EEUYBAUP/7348689.html}
}
@article{brandesProteinBERTUniversalDeeplearning2022,
title = {{{ProteinBERT}}: A Universal Deep-Learning Model of Protein Sequence and Function},
shorttitle = {{{ProteinBERT}}},
author = {Brandes, Nadav and Ofer, Dan and Peleg, Yam and Rappoport, Nadav and Linial, Michal},
date = {2022-04-12},
journaltitle = {Bioinformatics},
volume = {38},
number = {8},
pages = {2102--2110},
issn = {1367-4803},
doi = {10.1093/bioinformatics/btac020},
url = {https://doi.org/10.1093/bioinformatics/btac020},
urldate = {2024-10-30},
abstract = {Self-supervised deep language modeling has shown unprecedented success across natural language tasks, and has recently been repurposed to biological sequences. However, existing models and pretraining methods are designed and optimized for text analysis. We introduce ProteinBERT, a deep language model specifically designed for proteins. Our pretraining scheme combines language modeling with a novel task of Gene Ontology (GO) annotation prediction. We introduce novel architectural elements that make the model highly efficient and flexible to long sequences. The architecture of ProteinBERT consists of both local and global representations, allowing end-to-end processing of these types of inputs and outputs. ProteinBERT obtains near state-of-the-art performance, and sometimes exceeds it, on multiple benchmarks covering diverse protein properties (including protein structure, post-translational modifications and biophysical attributes), despite using a far smaller and faster model than competing deep-learning methods. Overall, ProteinBERT provides an efficient framework for rapidly training protein predictors, even with limited labeled data.Code and pretrained model weights are available at https://github.com/nadavbra/protein\_bert.Supplementary data are available at Bioinformatics online.},
file = {/Users/Nasy/Zotero/storage/RDGJZUPR/Brandes et al. - 2022 - ProteinBERT a universal deep-learning model of protein sequence and function.pdf;/Users/Nasy/Zotero/storage/6Q8TN74K/6502274.html}
}
@inproceedings{brandstetterMessagePassingNeural2022,
title = {Message {{Passing Neural PDE Solvers}}},
author = {Brandstetter, Johannes and Worrall, Daniel E. and Welling, Max},
date = {2022-05-08},
url = {https://openreview.net/forum?id=vSix3HPYKSU},
abstract = {The numerical solution of partial differential equations (PDEs) is difficult, having led to a century of research so far. Recently, there have been pushes to build neural--numerical hybrid solvers, which piggy-backs the modern trend towards fully end-to-end learned systems. Most works so far can only generalize over a subset of properties to which a generic solver would be faced, including: resolution, topology, geometry, boundary conditions, domain discretization regularity, dimensionality, etc. In this work, we build a solver, satisfying these properties, where all the components are based on neural message passing, replacing all heuristically designed components in the computation graph with backprop-optimized neural function approximators. We show that neural message passing solvers representationally contain some classical methods, such as finite differences, finite volumes, and WENO schemes. In order to encourage stability in training autoregressive models, we put forward a method that is based on the principle of zero-stability, posing stability as a domain adaptation problem. We validate our method on various fluid-like flow problems, demonstrating fast, stable, and accurate performance across different domain topologies, discretization, etc. in 1D and 2D. Our model outperforms state-of-the-art numerical solvers in the low resolution regime in terms of speed, and accuracy.},
eventtitle = {International {{Conference}} on {{Learning Representations}}},
langid = {english},
file = {/Users/Nasy/Zotero/storage/2JQ5CA7C/Brandstetter et al. - 2022 - Message Passing Neural PDE Solvers.pdf;/Users/Nasy/Zotero/storage/VI987Y2L/forum.html}
}
@article{camachoBLASTArchitectureApplications2009,
title = {{{BLAST}}+: Architecture and Applications},
shorttitle = {{{BLAST}}+},
author = {Camacho, Christiam and Coulouris, George and Avagyan, Vahram and Ma, Ning and Papadopoulos, Jason and Bealer, Kevin and Madden, Thomas L.},
date = {2009-12-15},
journaltitle = {BMC Bioinformatics},
volume = {10},
number = {1},
pages = {421},
issn = {1471-2105},
doi = {10.1186/1471-2105-10-421},
url = {https://doi.org/10.1186/1471-2105-10-421},
abstract = {Sequence similarity searching is a very important bioinformatics task. While Basic Local Alignment Search Tool (BLAST) outperforms exact methods through its use of heuristics, the speed of the current BLAST software is suboptimal for very long queries or database sequences. There are also some shortcomings in the user-interface of the current command-line applications.},
keywords = {Abstract Data Type,Basic Local Alignment Search Tool,Basic Local Alignment Search Tool Search,Lookup Table,Short Read Archive},
annotation = {10045 citations (Crossref) [2022-09-14]},
file = {/Users/Nasy/Zotero/storage/5WNA8BC7/Camacho et al. - 2009 - BLAST+ architecture and applications.pdf;/Users/Nasy/Zotero/storage/TPRBZ2EL/1471-2105-10-421.html}
}
@online{CBrainDeepLearning,
title = {C-{{Brain}}: {{A}} Deep Learning Accelerator That Tames the Diversity of {{CNNs}} through Adaptive Data-Level Parallelization | {{IEEE Conference Publication}} | {{IEEE Xplore}}},
url = {https://ieeexplore.ieee.org/document/7544365},
urldate = {2022-03-29},
file = {/Users/Nasy/Zotero/storage/H27EK3XF/7544365.html}
}
@inproceedings{cenRepresentationLearningAttributed2019,
title = {Representation {{Learning}} for {{Attributed Multiplex Heterogeneous Network}}},
booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
author = {Cen, Yukuo and Zou, Xu and Zhang, Jianwei and Yang, Hongxia and Zhou, Jingren and Tang, Jie},
date = {2019-07-25},
eprint = {1905.01669},
eprinttype = {arXiv},
eprintclass = {cs},
pages = {1358--1368},
doi = {10.1145/3292500.3330964},
url = {http://arxiv.org/abs/1905.01669},
urldate = {2022-05-30},
abstract = {Network embedding (or graph embedding) has been widely used in many real-world applications. However, existing methods mainly focus on networks with single-typed nodes/edges and cannot scale well to handle large networks. Many real-world networks consist of billions of nodes and edges of multiple types, and each node is associated with different attributes. In this paper, we formalize the problem of embedding learning for the Attributed Multiplex Heterogeneous Network and propose a unified framework to address this problem. The framework supports both transductive and inductive learning. We also give the theoretical analysis of the proposed framework, showing its connection with previous works and proving its better expressiveness. We conduct systematical evaluations for the proposed framework on four different genres of challenging datasets: Amazon, YouTube, Twitter, and Alibaba. Experimental results demonstrate that with the learned embeddings from the proposed framework, we can achieve statistically significant improvements (e.g., 5.99-28.23\% lift by F1 scores; p{$<<$}0.01, t-test) over previous state-of-the-art methods for link prediction. The framework has also been successfully deployed on the recommendation system of a worldwide leading e-commerce company, Alibaba Group. Results of the offline A/B tests on product recommendation further confirm the effectiveness and efficiency of the framework in practice.},
keywords = {Computer Science - Machine Learning,Computer Science - Social and Information Networks},
annotation = {130 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/WTF9DY8Y/Cen et al. - 2019 - Representation Learning for Attributed Multiplex H.pdf;/Users/Nasy/Zotero/storage/4QKNK4D7/1905.html}
}
@inproceedings{cenRepresentationLearningAttributed2019a,
title = {Representation {{Learning}} for {{Attributed Multiplex Heterogeneous Network}}},
booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
author = {Cen, Yukuo and Zou, Xu and Zhang, Jianwei and Yang, Hongxia and Zhou, Jingren and Tang, Jie},
date = {2019-07-25},
series = {{{KDD}} '19},
pages = {1358--1368},
publisher = {Association for Computing Machinery},
location = {New York, NY, USA},
doi = {10.1145/3292500.3330964},
url = {https://doi.org/10.1145/3292500.3330964},
urldate = {2022-06-05},
abstract = {Network embedding (or graph embedding) has been widely used in many real-world applications. However, existing methods mainly focus on networks with single-typed nodes/edges and cannot scale well to handle large networks. Many real-world networks consist of billions of nodes and edges of multiple types, and each node is associated with different attributes. In this paper, we formalize the problem of embedding learning for the Attributed Multiplex Heterogeneous Network and propose a unified framework to address this problem. The framework supports both transductive and inductive learning. We also give the theoretical analysis of the proposed framework, showing its connection with previous works and proving its better expressiveness. We conduct systematical evaluations for the proposed framework on four different genres of challenging datasets: Amazon, YouTube, Twitter, and Alibaba. Experimental results demonstrate that with the learned embeddings from the proposed framework, we can achieve statistically significant improvements (e.g., 5.99-28.23\% lift by F1 scores; p{$<<$}0.01, t-test) over previous state-of-the-art methods for link prediction. The framework has also been successfully deployed on the recommendation system of a worldwide leading e-commerce company, Alibaba Group. Results of the offline A/B tests on product recommendation further confirm the effectiveness and efficiency of the framework in practice.},
isbn = {978-1-4503-6201-6},
keywords = {heterogeneous network,multiplex network,network embedding},
annotation = {130 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/9EU6VBKP/Cen et al. - 2019 - Representation Learning for Attributed Multiplex H.pdf}
}
@online{chamberlainNeuralEmbeddingsGraphs2017,
title = {Neural {{Embeddings}} of {{Graphs}} in {{Hyperbolic Space}}},
author = {Chamberlain, Benjamin Paul and Clough, James and Deisenroth, Marc Peter},
date = {2017-05-29},
eprint = {1705.10359},
eprinttype = {arXiv},
eprintclass = {cs, stat},
doi = {10.48550/arXiv.1705.10359},
url = {http://arxiv.org/abs/1705.10359},
urldate = {2023-02-02},
abstract = {Neural embeddings have been used with great success in Natural Language Processing (NLP). They provide compact representations that encapsulate word similarity and attain state-of-the-art performance in a range of linguistic tasks. The success of neural embeddings has prompted significant amounts of research into applications in domains other than language. One such domain is graph-structured data, where embeddings of vertices can be learned that encapsulate vertex similarity and improve performance on tasks including edge prediction and vertex labelling. For both NLP and graph based tasks, embeddings have been learned in high-dimensional Euclidean spaces. However, recent work has shown that the appropriate isometric space for embedding complex networks is not the flat Euclidean space, but negatively curved, hyperbolic space. We present a new concept that exploits these recent insights and propose learning neural embeddings of graphs in hyperbolic space. We provide experimental evidence that embedding graphs in their natural geometry significantly improves performance on downstream tasks for several real-world public datasets.},
pubstate = {prepublished},
keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
file = {/Users/Nasy/Zotero/storage/DDSF2KU5/Chamberlain et al. - 2017 - Neural Embeddings of Graphs in Hyperbolic Space.pdf;/Users/Nasy/Zotero/storage/K2PHFPEW/1705.html}
}
@online{chamiHyperbolicGraphConvolutional2019,
title = {Hyperbolic {{Graph Convolutional Neural Networks}}},
author = {Chami, Ines and Ying, Rex and Ré, Christopher and Leskovec, Jure},
date = {2019-10-28},
eprint = {1910.12933},
eprinttype = {arXiv},
eprintclass = {cs, stat},
doi = {10.48550/arXiv.1910.12933},
url = {http://arxiv.org/abs/1910.12933},
urldate = {2023-02-02},
abstract = {Graph convolutional neural networks (GCNs) embed nodes in a graph into Euclidean space, which has been shown to incur a large distortion when embedding real-world graphs with scale-free or hierarchical structure. Hyperbolic geometry offers an exciting alternative, as it enables embeddings with much smaller distortion. However, extending GCNs to hyperbolic geometry presents several unique challenges because it is not clear how to define neural network operations, such as feature transformation and aggregation, in hyperbolic space. Furthermore, since input features are often Euclidean, it is unclear how to transform the features into hyperbolic embeddings with the right amount of curvature. Here we propose Hyperbolic Graph Convolutional Neural Network (HGCN), the first inductive hyperbolic GCN that leverages both the expressiveness of GCNs and hyperbolic geometry to learn inductive node representations for hierarchical and scale-free graphs. We derive GCN operations in the hyperboloid model of hyperbolic space and map Euclidean input features to embeddings in hyperbolic spaces with different trainable curvature at each layer. Experiments demonstrate that HGCN learns embeddings that preserve hierarchical structure, and leads to improved performance when compared to Euclidean analogs, even with very low dimensional embeddings: compared to state-of-the-art GCNs, HGCN achieves an error reduction of up to 63.1\% in ROC AUC for link prediction and of up to 47.5\% in F1 score for node classification, also improving state-of-the art on the Pubmed dataset.},
pubstate = {prepublished},
keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
file = {/Users/Nasy/Zotero/storage/YSREMQL9/Chami et al. - 2019 - Hyperbolic Graph Convolutional Neural Networks.pdf;/Users/Nasy/Zotero/storage/PIMRR6YC/1910.html}
}
@article{chenAeSpTVAdaptiveEfficient2020,
title = {{{aeSpTV}}: {{An Adaptive}} and {{Efficient Framework}} for {{Sparse Tensor-Vector Product Kernel}} on a {{High-Performance Computing Platform}}},
shorttitle = {{{aeSpTV}}},
author = {Chen, Yuedan and Xiao, Guoqing and Özsu, M. Tamer and Liu, Chubo and Zomaya, Albert Y. and Li, Tao},
date = {2020-10},
journaltitle = {IEEE Transactions on Parallel and Distributed Systems},
volume = {31},
number = {10},
pages = {2329--2345},
issn = {1558-2183},
doi = {10.1109/TPDS.2020.2990429},
abstract = {Multi-dimensional, large-scale, and sparse data, which can be neatly represented by sparse tensors, are increasingly used in various applications such as data analysis and machine learning. A high-performance sparse tensor-vector product (SpTV), one of the most fundamental operations of processing sparse tensors, is necessary for improving efficiency of related applications. In this article, we propose aeSpTV, an adaptive and efficient SpTV framework on Sunway TaihuLight supercomputer, to solve several challenges of optimizing SpTVon high-performance computing platforms. First, to map SpTV to Sunway architecture and tame expensive memory access latency and parallel writing conflict due to the intrinsic irregularity of SpTV, we introduce an adaptive SpTV parallelization. Second, to co-execute with the parallelization design while still ensuring high efficiency, we design a sparse tensor data structure named CSSoCR. Third, based on the adaptive SpTV parallelization with the novel tensor data structure, we present an autotuner that chooses the most befitting tensor partitioning method for aeSpTV using the variance analysis theory of mathematical statistics to achieve load balance. Fourth, to further leverage the computing power of Sunway, we propose customized optimizations for aeSpTV. Experimental results show that aeSpTV yields good sacalability on both thread-level and process-level parallelism of Sunway. It achieves a maximum GFLOPS of 195.69 on 128 processes. Additionally, it is proved that optimization effects of the partitioning autotuner and optimization techniques are remarkable.},
eventtitle = {{{IEEE Transactions}} on {{Parallel}} and {{Distributed Systems}}},
keywords = {Data structures,Kernel,Optimization,Parallel,Parallel processing,partition,Sparse matrices,sparse tensor data structure,sparse tensor-vector product,Sunway architecture,Tensors},
annotation = {8 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/I4YA3G4J/9078890.html}
}
@article{chenBilayeredParallelTraining2019,
title = {A {{Bi-layered Parallel Training Architecture}} for {{Large-Scale Convolutional Neural Networks}}},
author = {Chen, Jianguo and Li, Kenli and Bilal, Kashif and {zhou}, xu and Li, Keqin and Yu, Philip S.},
date = {2019-05},
journaltitle = {IEEE Transactions on Parallel and Distributed Systems},
volume = {30},
number = {5},
pages = {965--976},
issn = {1558-2183},
doi = {10.1109/TPDS.2018.2877359},
abstract = {Benefitting from large-scale training datasets and the complex training network, Convolutional Neural Networks (CNNs) are widely applied in various fields with high accuracy. However, the training process of CNNs is very time-consuming, where large amounts of training samples and iterative operations are required to obtain high-quality weight parameters. In this paper, we focus on the time-consuming training process of large-scale CNNs and propose a Bi-layered Parallel Training (BPT-CNN) architecture in distributed computing environments. BPT-CNN consists of two main components: (a) an outer-layer parallel training for multiple CNN subnetworks on separate data subsets, and (b) an inner-layer parallel training for each subnetwork. In the outer-layer parallelism, we address critical issues of distributed and parallel computing, including data communication, synchronization, and workload balance. A heterogeneous-aware Incremental Data Partitioning and Allocation (IDPA) strategy is proposed, where large-scale training datasets are partitioned and allocated to the computing nodes in batches according to their computing power. To minimize the synchronization waiting during the global weight update process, an Asynchronous Global Weight Update (AGWU) strategy is proposed. In the inner-layer parallelism, we further accelerate the training process for each CNN subnetwork on each computer, where computation steps of convolutional layer and the local weight training are parallelized based on task-parallelism. We introduce task decomposition and scheduling strategies with the objectives of thread-level load balancing and minimum waiting time for critical paths. Extensive experimental results indicate that the proposed BPT-CNN effectively improves the training performance of CNNs while maintaining the accuracy.},
eventtitle = {{{IEEE Transactions}} on {{Parallel}} and {{Distributed Systems}}},
keywords = {Acceleration,bi-layered parallel computing,Big data,Computational modeling,Computer architecture,convolutional neural networks,deep learning,distributed computing,Distributed computing,Parallel processing,Task analysis,Training},
annotation = {88 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/YCNXLFJM/Chen et al. - 2019 - A Bi-layered Parallel Training Architecture for La.pdf}
}
@article{chenDeepRetrosyntheticReaction2021,
title = {Deep {{Retrosynthetic Reaction Prediction}} Using {{Local Reactivity}} and {{Global Attention}}},
author = {Chen, Shuan and Jung, Yousung},
date = {2021-10-25},
journaltitle = {JACS Au},
volume = {1},
number = {10},
pages = {1612--1620},
doi = {10.1021/jacsau.1c00246},
url = {https://doi.org/10.1021/jacsau.1c00246},
urldate = {2024-07-11},
abstract = {As a fundamental problem in chemistry, retrosynthesis aims at designing reaction pathways and intermediates for a target compound. The goal of artificial intelligence (AI)-aided retrosynthesis is to automate this process by learning from the previous chemical reactions to make new predictions. Although several models have demonstrated their potentials for automated retrosynthesis, there is still a significant need to further enhance the prediction accuracy to a more practical level. Here we propose a local retrosynthesis framework called LocalRetro, motivated by the chemical intuition that the molecular changes occur mostly locally during the chemical reactions. This differs from nearly all existing retrosynthesis methods that suggest reactants based on the global structures of the molecules, often containing fine details not directly relevant to the reactions. This local concept yields local reaction templates involving the atom and bond edits. Because the remote functional groups can also affect the overall reaction path as a secondary aspect, the proposed locally encoded retrosynthesis model is then further refined to account for the nonlocal effects of chemical reaction through a global attention mechanism. Our model shows a promising 89.5 and 99.2\% round-trip accuracy at top-1 and top-5 predictions for the USPTO-50K dataset containing 50\,016 reactions. We further demonstrate the validity of LocalRetro on a large dataset containing 479\,035 reactions (UTPTO-MIT) with comparable round-trip top-1 and top-5 accuracy of 87.0 and 97.4\%, respectively. The practical application of the model is also demonstrated by correctly predicting the synthesis pathways of five drug candidate molecules from various literature.},
file = {/Users/Nasy/Zotero/storage/3T4SQFYJ/Chen and Jung - 2021 - Deep Retrosynthetic Reaction Prediction using Local Reactivity and Global Attention.pdf}
}
@online{chenFullyHyperbolicNeural2022,
title = {Fully {{Hyperbolic Neural Networks}}},
author = {Chen, Weize and Han, Xu and Lin, Yankai and Zhao, Hexu and Liu, Zhiyuan and Li, Peng and Sun, Maosong and Zhou, Jie},
date = {2022-03-15},
eprint = {2105.14686},
eprinttype = {arXiv},
eprintclass = {cs},
doi = {10.48550/arXiv.2105.14686},
url = {http://arxiv.org/abs/2105.14686},
urldate = {2023-03-08},
abstract = {Hyperbolic neural networks have shown great potential for modeling complex data. However, existing hyperbolic networks are not completely hyperbolic, as they encode features in a hyperbolic space yet formalize most of their operations in the tangent space (a Euclidean subspace) at the origin of the hyperbolic space. This hybrid method greatly limits the modeling ability of networks. In this paper, we propose a fully hyperbolic framework to build hyperbolic networks based on the Lorentz model by adapting the Lorentz transformations (including boost and rotation) to formalize essential operations of neural networks. Moreover, we also prove that linear transformation in tangent spaces used by existing hyperbolic networks is a relaxation of the Lorentz rotation and does not include the boost, implicitly limiting the capabilities of existing hyperbolic networks. The experimental results on four NLP tasks show that our method has better performance for building both shallow and deep networks. Our code will be released to facilitate follow-up research.},
pubstate = {prepublished},
keywords = {Computer Science - Computation and Language,Computer Science - Machine Learning},
file = {/Users/Nasy/Zotero/storage/M95YSWX9/Chen et al. - 2022 - Fully Hyperbolic Neural Networks.pdf;/Users/Nasy/Zotero/storage/MEU5FT8W/2105.html}
}
@inproceedings{chenHpSpMVHeterogeneousParallel2019,
title = {{{hpSpMV}}: {{A Heterogeneous Parallel Computing Scheme}} for {{SpMV}} on the {{Sunway TaihuLight Supercomputer}}},
shorttitle = {{{hpSpMV}}},
booktitle = {2019 {{IEEE}} 21st {{International Conference}} on {{High Performance Computing}} and {{Communications}}; {{IEEE}} 17th {{International Conference}} on {{Smart City}}; {{IEEE}} 5th {{International Conference}} on {{Data Science}} and {{Systems}} ({{HPCC}}/{{SmartCity}}/{{DSS}})},
author = {Chen, Yuedan and Xiao, Guoqing and Xiao, Zheng and Yang, Wangdong},
date = {2019-08},
pages = {989--995},
doi = {10.1109/HPCC/SmartCity/DSS.2019.00142},
abstract = {Sparse matrix-vector multiplication (SpMV) is one of the most essential algorithms in various applications. This paper designs hpSpMV, a heterogeneous parallel computing scheme for SpMV, on the Sunway TaihuLight. There are three main contributions of the hpSpMV. (1) We propose a heterogeneous parallelization design for the SpMV based on the heterogeneous manycore architecture of the SW26010 of Sunway TaihuLight and the given sparse matrix formats. (2) We analyze the execution time of the proposed heterogeneous parallel SpMV on the Sunway. (3) We propose an auto-tuning framework to set the proper parameter of the heterogeneous parallel SpMV based on the execution time analysis on the Sunway. We test the hpSpMV's performance on the Sunway TaihuLight, the result analysis indicates that the hpSpMV has obvious performance improvement and good scalability on the Sunway TaihuLight.},
eventtitle = {2019 {{IEEE}} 21st {{International Conference}} on {{High Performance Computing}} and {{Communications}}; {{IEEE}} 17th {{International Conference}} on {{Smart City}}; {{IEEE}} 5th {{International Conference}} on {{Data Science}} and {{Systems}} ({{HPCC}}/{{SmartCity}}/{{DSS}})},
keywords = {Computer architecture,Conferences,Heterogeneous sparse matrix format sparse matrix-vector multiplication parallel Sunway TaihuLight,Kernel,Parallel processing,Program processors,Sparse matrices,Supercomputers},
annotation = {3 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/SWMCNACB/8855593.html}
}
@inproceedings{chenImprovingInContextFewShot2022,
title = {Improving {{In-Context Few-Shot Learning}} via {{Self-Supervised Training}}},
booktitle = {Proceedings of the 2022 {{Conference}} of the {{North American Chapter}} of the {{Association}} for {{Computational Linguistics}}: {{Human Language Technologies}}},
author = {Chen, Mingda and Du, Jingfei and Pasunuru, Ramakanth and Mihaylov, Todor and Iyer, Srini and Stoyanov, Veselin and Kozareva, Zornitsa},
date = {2022-07},
pages = {3558--3573},
publisher = {Association for Computational Linguistics},
location = {Seattle, United States},
doi = {10.18653/v1/2022.naacl-main.260},
url = {https://aclanthology.org/2022.naacl-main.260},
abstract = {Self-supervised pretraining has made few-shot learning possible for many NLP tasks. But the pretraining objectives are not typically adapted specifically for in-context few-shot learning. In this paper, we propose to use self-supervision in an intermediate training stage between pretraining and downstream few-shot usage with the goal to teach the model to perform in-context few shot learning. We propose and evaluate four self-supervised objectives on two benchmarks. We find that the intermediate self-supervision stage produces models that outperform strong baselines. Ablation study shows that several factors affect the downstream performance, such as the amount of training data and the diversity of the self-supervised objectives. Human-annotated cross-task supervision and self-supervision are complementary. Qualitative analysis suggests that the self-supervised-trained models are better at following task requirements.},
eventtitle = {{{NAACL-HLT}} 2022},
annotation = {0 citations (Crossref) [2023-04-09]},
file = {/Users/Nasy/Zotero/storage/4HDLFHX2/Chen et al. - 2022 - Improving In-Context Few-Shot Learning via Self-Su.pdf}
}
@inproceedings{chenPMEProjectedMetric2018,
title = {{{PME}}: {{Projected Metric Embedding}} on {{Heterogeneous Networks}} for {{Link Prediction}}},
shorttitle = {{{PME}}},
booktitle = {Proceedings of the 24th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
author = {Chen, Hongxu and Yin, Hongzhi and Wang, Weiqing and Wang, Hao and Nguyen, Quoc Viet Hung and Li, Xue},
date = {2018-07-19},
series = {{{KDD}} '18},
pages = {1177--1186},
publisher = {Association for Computing Machinery},
location = {New York, NY, USA},
doi = {10.1145/3219819.3219986},
url = {https://doi.org/10.1145/3219819.3219986},
urldate = {2022-06-05},
abstract = {Heterogenous information network embedding aims to embed heterogenous information networks (HINs) into low dimensional spaces, in which each vertex is represented as a low-dimensional vector, and both global and local network structures in the original space are preserved. However, most of existing heterogenous information network embedding models adopt the dot product to measure the proximity in the low dimensional space, and thus they can only preserve the first-order proximity and are insufficient to capture the global structure. Compared with homogenous information networks, there are multiple types of links (i.e., multiple relations) in HINs, and the link distribution w.r.t relations is highly skewed. To address the above challenging issues, we propose a novel heterogenous information network embedding model PME based on the metric learning to capture both first-order and second-order proximities in a unified way. To alleviate the potential geometrical inflexibility of existing metric learning approaches, we propose to build object and relation embeddings in separate object space and relation spaces rather than in a common space. Afterwards, we learn embeddings by firstly projecting vertices from object space to corresponding relation space and then calculate the proximity between projected vertices. To overcome the heavy skewness of the link distribution w.r.t relations and avoid "over-sampling'' or "under-sampling'' for each relation, we propose a novel loss-aware adaptive sampling approach for the model optimization. Extensive experiments have been conducted on a large-scale HIN dataset, and the experimental results show superiority of our proposed PME model in terms of prediction accuracy and scalability.},
isbn = {978-1-4503-5552-0},
keywords = {heterogenous network embedding,link prediction},
annotation = {87 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/Y2E2GLGV/Chen et al. - 2018 - PME Projected Metric Embedding on Heterogeneous N.pdf}
}
@inproceedings{chenSemisupervisedUserProfiling2019,
title = {Semi-Supervised {{User Profiling}} with {{Heterogeneous Graph Attention Networks}}},
booktitle = {Proceedings of the {{Twenty-Eighth International Joint Conference}} on {{Artificial Intelligence}}},
author = {Chen, Weijian and Gu, Yulong and Ren, Zhaochun and He, Xiangnan and Xie, Hongtao and Guo, Tong and Yin, Dawei and Zhang, Yongdong},
date = {2019-08},
pages = {2116--2122},
publisher = {International Joint Conferences on Artificial Intelligence Organization},
location = {Macao, China},
doi = {10.24963/ijcai.2019/293},
url = {https://www.ijcai.org/proceedings/2019/293},
urldate = {2022-05-26},
abstract = {Aiming to represent user characteristics and personal interests, the task of user profiling is playing an increasingly important role for many real-world applications, e.g., e-commerce and social networks platforms. By exploiting the data like texts and user behaviors, most existing solutions address user profiling as a classification task, where each user is formulated as an individual data instance. Nevertheless, a user’s profile is not only reflected from her/his affiliated data, but also can be inferred from other users, e.g., the users that have similar copurchase behaviors in e-commerce, the friends in social networks, etc. In this paper, we approach user profiling in a semi-supervised manner, developing a generic solution based on heterogeneous graph learning. On the graph, nodes represent the entities of interest (e.g., users, items, attributes of items, etc.), and edges represent the interactions between entities. Our heterogeneous graph attention networks (HGAT) method learns the representation for each entity by accounting for the graph structure, and exploits the attention mechanism to discriminate the importance of each neighbor entity. Through such a learning scheme, HGAT can leverage both unsupervised information and limited labels of users to build the predictor. Extensive experiments on a real-world e-commerce dataset verify the effectiveness and rationality of our HGAT for user profiling.},
eventtitle = {Twenty-{{Eighth International Joint Conference}} on {{Artificial Intelligence}} \{\vphantom\}{{IJCAI-19}}\vphantom\{\}},
isbn = {978-0-9992411-4-1},
langid = {english},
annotation = {25 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/T2ZACIS8/Chen et al. - 2019 - Semi-supervised User Profiling with Heterogeneous .pdf}
}
@article{chenSequenceStructuralAnalyses2017,
title = {Sequence and {{Structural Analyses Reveal Distinct}} and {{Highly Diverse Human CD8}}+ {{TCR Repertoires}} to {{Immunodominant Viral Antigens}}},
author = {Chen, Guobing and Yang, Xinbo and Ko, Annette and Sun, Xiaoping and Gao, Mingming and Zhang, Yongqing and Shi, Alvin and Mariuzza, Roy A. and Weng, Nan-Ping},
date = {2017-04-18},
journaltitle = {Cell Rep},
volume = {19},
number = {3},
eprint = {28423320},
eprinttype = {pmid},
pages = {569--583},
issn = {2211-1247},
doi = {10.1016/j.celrep.2017.03.072},
abstract = {A diverse T~cell receptor (TCR) repertoire is essential for controlling viral infections. However, information about TCR repertoires to defined viral antigens is limited. We performed a comprehensive analysis of~CD8+ TCR repertoires for two dominant viral epitopes: pp65495-503 (NLV) of cytomegalovirus and M158-66 (GIL) of influenza A virus. The highly individualized repertoires (87-5,533 α or β clonotypes per subject) comprised thousands of unique TCRα and TCRβ sequences and dozens of distinct complementary determining region (CDR)3α and CDR3β motifs. However, diversity is effectively restricted by preferential V-J combinations, CDR3 lengths, and CDR3α/CDR3β pairings. Structures of two GIL-specific TCRs bound to GIL-HLA-A2 provided a potential explanation for the lower diversity of GIL-specific versus NLV-specific repertoires. These anti-viral TCRs occupied up to 3.4\% of the CD8+ TCRβ repertoire, ensuring broad T~cell responses to single epitopes. Our portrait of two anti-viral TCR repertoires may inform the development of predictors of immune protection.},
langid = {english},
pmcid = {PMC5472051},
keywords = {Adult,Amino Acid Motifs,Amino Acid Sequence,Antibody Affinity,Antigens Viral,CD8 T cells,CD8-Positive T-Lymphocytes,Clone Cells,Complementarity Determining Regions,Consensus Sequence,Cytomegalovirus,HLA-A2 Antigen,human,Humans,Immunodominant Epitopes,Influenza A virus,Peptides,Protein Binding,Receptors Antigen T-Cell,Species Specificity,TCR repertoire,TCR-pMHC structure,αβ TCRs for CMV-NLV,αβ TCRs for IAV-GIL},
annotation = {76 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/HJ4VAYPH/Chen et al. - 2017 - Sequence and Structural Analyses Reveal Distinct a.pdf}
}
@inproceedings{chenSimpleFrameworkContrastive2020,
title = {A {{Simple Framework}} for {{Contrastive Learning}} of {{Visual Representations}}},
booktitle = {Proceedings of the 37th {{International Conference}} on {{Machine Learning}}},
author = {Chen, Ting and Kornblith, Simon and Norouzi, Mohammad and Hinton, Geoffrey},
date = {2020-11-21},
pages = {1597--1607},
publisher = {PMLR},
issn = {2640-3498},
url = {https://proceedings.mlr.press/v119/chen20j.html},
urldate = {2022-03-17},
abstract = {This paper presents SimCLR: a simple framework for contrastive learning of visual representations. We simplify recently proposed contrastive self-supervised learning algorithms without requiring specialized architectures or a memory bank. In order to understand what enables the contrastive prediction tasks to learn useful representations, we systematically study the major components of our framework. We show that (1) composition of data augmentations plays a critical role in defining effective predictive tasks, (2) introducing a learnable nonlinear transformation between the representation and the contrastive loss substantially improves the quality of the learned representations, and (3) contrastive learning benefits from larger batch sizes and more training steps compared to supervised learning. By combining these findings, we are able to considerably outperform previous methods for self-supervised and semi-supervised learning on ImageNet. A linear classifier trained on self-supervised representations learned by SimCLR achieves 76.5\% top-1 accuracy, which is a 7\% relative improvement over previous state-of-the-art, matching the performance of a supervised ResNet-50. When fine-tuned on only 1\% of the labels, we achieve 85.8\% top-5 accuracy, outperforming AlexNet with 100X fewer labels.},
eventtitle = {International {{Conference}} on {{Machine Learning}}},
langid = {english},
file = {/Users/Nasy/Zotero/storage/DVKM3MFQ/Chen et al. - 2020 - A Simple Framework for Contrastive Learning of Vis.pdf;/Users/Nasy/Zotero/storage/M8V8MP6Q/Chen et al. - 2020 - A Simple Framework for Contrastive Learning of Vis.pdf}
}
@online{chenSymbolicDiscoveryOptimization2023,
title = {Symbolic {{Discovery}} of {{Optimization Algorithms}}},
author = {Chen, Xiangning and Liang, Chen and Huang, Da and Real, Esteban and Wang, Kaiyuan and Liu, Yao and Pham, Hieu and Dong, Xuanyi and Luong, Thang and Hsieh, Cho-Jui and Lu, Yifeng and Le, Quoc V.},
date = {2023-05-08},
eprint = {2302.06675},
eprinttype = {arXiv},
doi = {10.48550/arXiv.2302.06675},
url = {http://arxiv.org/abs/2302.06675},
urldate = {2024-11-08},
abstract = {We present a method to formulate algorithm discovery as program search, and apply it to discover optimization algorithms for deep neural network training. We leverage efficient search techniques to explore an infinite and sparse program space. To bridge the large generalization gap between proxy and target tasks, we also introduce program selection and simplification strategies. Our method discovers a simple and effective optimization algorithm, \$\textbackslash textbf\{Lion\}\$ (\$\textbackslash textit\{Evo\$\textbackslash textbf\{L\}\$ved S\$\textbackslash textbf\{i\}\$gn M\$\textbackslash textbf\{o\}\$me\$\textbackslash textbf\{n\}\$tum\}\$). It is more memory-efficient than Adam as it only keeps track of the momentum. Different from adaptive optimizers, its update has the same magnitude for each parameter calculated through the sign operation. We compare Lion with widely used optimizers, such as Adam and Adafactor, for training a variety of models on different tasks. On image classification, Lion boosts the accuracy of ViT by up to 2\% on ImageNet and saves up to 5x the pre-training compute on JFT. On vision-language contrastive learning, we achieve 88.3\% \$\textbackslash textit\{zero-shot\}\$ and 91.1\% \$\textbackslash textit\{fine-tuning\}\$ accuracy on ImageNet, surpassing the previous best results by 2\% and 0.1\%, respectively. On diffusion models, Lion outperforms Adam by achieving a better FID score and reducing the training compute by up to 2.3x. For autoregressive, masked language modeling, and fine-tuning, Lion exhibits a similar or better performance compared to Adam. Our analysis of Lion reveals that its performance gain grows with the training batch size. It also requires a smaller learning rate than Adam due to the larger norm of the update produced by the sign function. Additionally, we examine the limitations of Lion and identify scenarios where its improvements are small or not statistically significant. Lion is also successfully deployed in production systems such as Google search ads CTR model.},
pubstate = {prepublished},
keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing},
file = {/Users/Nasy/Zotero/storage/9SM7KL8X/Chen et al. - 2023 - Symbolic Discovery of Optimization Algorithms.pdf;/Users/Nasy/Zotero/storage/CFKR3T2W/2302.html}
}
@inproceedings{chilimbiProjectAdamBuilding2014,
title = {Project {{Adam}}: {{Building}} an {{Efficient}} and {{Scalable Deep Learning Training System}}},
booktitle = {11th {{USENIX Symposium}} on {{Operating Systems Design}} and {{Implementation}} ({{OSDI}} 14)},
author = {Chilimbi, Trishul and Suzue, Yutaka and Apacible, Johnson and Kalyanaraman, Karthik},
date = {2014-10},
pages = {571--582},
publisher = {USENIX Association},
location = {Broomfield, CO},
url = {https://www.usenix.org/conference/osdi14/technical-sessions/presentation/chilimbi},
isbn = {978-1-931971-16-4},
keywords = {nosource}
}
@article{chiuSearchingYoungStellar2021,
title = {Searching for Young Stellar Objects through {{SEDs}} by Machine Learning},
author = {Chiu, Y. -L. and Ho, C. -T. and Wang, D. -W. and Lai, S. -P.},
date = {2021-07-01},
journaltitle = {Astronomy and Computing},
volume = {36},
pages = {100470},
issn = {2213-1337},
doi = {10.1016/j.ascom.2021.100470},
url = {https://www.sciencedirect.com/science/article/pii/S221313372100024X},
urldate = {2022-07-29},
abstract = {Accurate measurements of statistical properties, such as the star formation rate and the lifetime of young stellar objects (YSOs) in different stages, are essential for constraining star formation theories. However, it is a difficult task to separate galaxies and YSOs based on spectral energy distributions (SEDs) alone, because they contain both thermal emission from stars and dust around them and no reliable theories can be applied to distinguish them. Here we compare different machine learning algorithms and develop the Spectrum Classifier of Astronomical Objects (SCAO), based on Fully Connected Neural Network (FCN), to classify regular stars, galaxies, and YSOs. Superior to previous classifiers, SCAO is solely trained by high quality data labeled in Molecular Cores to Planet-forming Disks (c2d) catalog without a priori theoretical knowledge, and provides excellent results with high precision ({$>$}96\%) and recall ({$>$}98\%) for YSOs when only eight bands are included. We systematically investigate the effects of observation errors and distance effects, and show that high accuracy performance is still maintained even when using fluxes of only three bands (IRAC 3, a=IRAC 4, and MIPS 1) in the long wavelengths regime, because the silicate absorption feature is automatically detected by SCAO. Finally, we applied SCAO to Spitzer Enhanced Imaging Products (SEIP), the most complete catalog of Spitzer observations, and found 129219 YSO candidates. The website from SCAO is available at http://scao.astr.nthu.edu.tw.},
langid = {english},
keywords = {Deep learning,Neural networks,SED,YSO},
annotation = {3 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/EWC9MFJA/Chiu et al. - 2021 - Searching for young stellar objects through SEDs b.pdf;/Users/Nasy/Zotero/storage/BC92DAZD/S221313372100024X.html}
}
@online{choiEmpiricalComparisonsOptimizers2020,
title = {On {{Empirical Comparisons}} of {{Optimizers}} for {{Deep Learning}}},
author = {Choi, Dami and Shallue, Christopher J. and Nado, Zachary and Lee, Jaehoon and Maddison, Chris J. and Dahl, George E.},
date = {2020-06-15},
eprint = {1910.05446},
eprinttype = {arXiv},
eprintclass = {cs, stat},
doi = {10.48550/arXiv.1910.05446},
url = {http://arxiv.org/abs/1910.05446},
abstract = {Selecting an optimizer is a central step in the contemporary deep learning pipeline. In this paper, we demonstrate the sensitivity of optimizer comparisons to the hyperparameter tuning protocol. Our findings suggest that the hyperparameter search space may be the single most important factor explaining the rankings obtained by recent empirical comparisons in the literature. In fact, we show that these results can be contradicted when hyperparameter search spaces are changed. As tuning effort grows without bound, more general optimizers should never underperform the ones they can approximate (i.e., Adam should never perform worse than momentum), but recent attempts to compare optimizers either assume these inclusion relationships are not practically relevant or restrict the hyperparameters in ways that break the inclusions. In our experiments, we find that inclusion relationships between optimizers matter in practice and always predict optimizer comparisons. In particular, we find that the popular adaptive gradient methods never underperform momentum or gradient descent. We also report practical tips around tuning often ignored hyperparameters of adaptive gradient methods and raise concerns about fairly benchmarking optimizers for neural network training.},
pubstate = {prepublished},
keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
file = {/Users/Nasy/Zotero/storage/PZNQE4WA/Choi et al. - 2020 - On Empirical Comparisons of Optimizers for Deep Le.pdf;/Users/Nasy/Zotero/storage/W6T3WUTK/1910.html}
}
@inproceedings{chopraLearningSimilarityMetric2005,
title = {Learning a Similarity Metric Discriminatively, with Application to Face Verification},
booktitle = {2005 {{IEEE Computer Society Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}}'05)},
author = {Chopra, S. and Hadsell, R. and LeCun, Y.},
date = {2005-06},
volume = {1},
pages = {539-546 vol. 1},
issn = {1063-6919},
doi = {10.1109/CVPR.2005.202},
abstract = {We present a method for training a similarity metric from data. The method can be used for recognition or verification applications where the number of categories is very large and not known during training, and where the number of training samples for a single category is very small. The idea is to learn a function that maps input patterns into a target space such that the L/sub 1/ norm in the target space approximates the "semantic" distance in the input space. The method is applied to a face verification task. The learning process minimizes a discriminative loss function that drives the similarity metric to be small for pairs of faces from the same person, and large for pairs from different persons. The mapping from raw to the target space is a convolutional network whose architecture is designed for robustness to geometric distortions. The system is tested on the Purdue/AR face database which has a very high degree of variability in the pose, lighting, expression, position, and artificial occlusions such as dark glasses and obscuring scarves.},
eventtitle = {2005 {{IEEE Computer Society Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}}'05)},
keywords = {Artificial neural networks,Character generation,Drives,Face recognition,Glass,Robustness,Spatial databases,Support vector machine classification,Support vector machines,System testing},
annotation = {1219 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/SJNIFE7T/1467314.html}
}
@article{choudharySiGMaNetDeepLearning2022,
title = {{{SiGMa-Net}}: {{Deep}} Learning Network to Distinguish Binary Black Hole Signals from Short-Duration Noise Transients},
shorttitle = {{{SiGMa-Net}}},
author = {Choudhary, Sunil and More, Anupreeta and Suyamprakasam, Sudhagar and Bose, Sukanta},
date = {2022-02-17},
url = {https://arxiv.org/abs/2202.08671v1},
urldate = {2022-02-18},
abstract = {Blip glitches, a type of short-duration noise transient in the LIGO--Virgo data, are a nuisance for the binary black hole (BBH) searches. They affect the BBH search sensitivity significantly because their time-domain morphologies are very similar, and that creates difficulty in vetoing them. In this work, we construct a deep-learning neural network to efficiently distinguish BBH signals from blip glitches. We introduce sine-Gaussian projection (SGP) maps, which are projections of GW frequency-domain data snippets on a basis of sine-Gaussians defined by the quality factor and central frequency. We feed the SGP maps to our deep-learning neural network, which classifies the BBH signals and blips. Whereas the BBH signals are simulated, the blips used are taken from real data throughout our analysis. We show that our network significantly improves the identification of the BBH signals in comparison to the results obtained using traditional-\$\textbackslash chi\textasciicircum 2\$ and sine-Gaussian \$\textbackslash chi\textasciicircum 2\$. For example, our network improves the sensitivity by 75\% at a false-positive rate of \$10\textasciicircum\{-2\}\$ for BBHs with total mass in the range \$[80,140]\textasciitilde M\_\{\textbackslash odot\}\$ and SNR in the range \$[3,8]\$. Also, it correctly identifies 95\% of the real GW events in GWTC-3. The computation time for classification is a few minutes for thousands of SGP maps on a single core. With further optimisation in the next version of our algorithm, we expect a further reduction in the computational cost. Our proposed method can potentially improve the veto process in the LIGO--Virgo GW data analysis and conceivably support identifying GW signals in low-latency pipelines.},
langid = {english},
file = {/Users/Nasy/Zotero/storage/5P24WFYX/Choudhary et al. - 2022 - SiGMa-Net Deep learning network to distinguish bi.pdf;/Users/Nasy/Zotero/storage/KTEA2UK4/2202.html}
}
@article{chuangDebiasedContrastiveLearning2020,
title = {Debiased Contrastive Learning},
author = {Chuang, Ching-Yao and Robinson, Joshua and Lin, Yen-Chen and Torralba, Antonio and Jegelka, Stefanie},
date = {2020},
journaltitle = {Advances in Neural Information Processing Systems},
volume = {33},
file = {/Users/Nasy/Zotero/storage/ZEMMUQ5V/Chuang et al. - Debiased Contrastive Learning.pdf}
}
@article{chuaReducedOrderModelingArtificial2019,
title = {Reduced-{{Order Modeling}} with {{Artificial Neurons}} for {{Gravitational-Wave Inference}}},
author = {Chua, Alvin J. K. and Galley, Chad R. and Vallisneri, Michele},
date = {2019-05-28},
journaltitle = {Phys. Rev. Lett.},
volume = {122},
number = {21},
pages = {211101},
publisher = {American Physical Society},
doi = {10.1103/PhysRevLett.122.211101},
url = {https://link.aps.org/doi/10.1103/PhysRevLett.122.211101},
urldate = {2021-10-18},
abstract = {Gravitational-wave data analysis is rapidly absorbing techniques from deep learning, with a focus on convolutional networks and related methods that treat noisy time series as images. We pursue an alternative approach, in which waveforms are first represented as weighted sums over reduced bases (reduced-order modeling); we then train artificial neural networks to map gravitational-wave source parameters into basis coefficients. Statistical inference proceeds directly in coefficient space, where it is theoretically straightforward and computationally efficient. The neural networks also provide analytic waveform derivatives, which are useful for gradient-based sampling schemes. We demonstrate fast and accurate coefficient interpolation for the case of a four-dimensional binary-inspiral waveform family and discuss promising applications of our framework in parameter estimation.},
issue = {21},
annotation = {34 citations (Crossref) [2022-08-03]}
}
@online{cohenGaugeEquivariantConvolutional2019,
title = {Gauge {{Equivariant Convolutional Networks}} and the {{Icosahedral CNN}}},
author = {Cohen, Taco S. and Weiler, Maurice and Kicanaoglu, Berkay and Welling, Max},
date = {2019-05-13},
eprint = {1902.04615},
eprinttype = {arXiv},
eprintclass = {cs, stat},
url = {http://arxiv.org/abs/1902.04615},
urldate = {2022-11-28},
abstract = {The principle of equivariance to symmetry transformations enables a theoretically grounded approach to neural network architecture design. Equivariant networks have shown excellent performance and data efficiency on vision and medical imaging problems that exhibit symmetries. Here we show how this principle can be extended beyond global symmetries to local gauge transformations. This enables the development of a very general class of convolutional neural networks on manifolds that depend only on the intrinsic geometry, and which includes many popular methods from equivariant and geometric deep learning. We implement gauge equivariant CNNs for signals defined on the surface of the icosahedron, which provides a reasonable approximation of the sphere. By choosing to work with this very regular manifold, we are able to implement the gauge equivariant convolution using a single conv2d call, making it a highly scalable and practical alternative to Spherical CNNs. Using this method, we demonstrate substantial improvements over previous methods on the task of segmenting omnidirectional images and global climate patterns.},
pubstate = {prepublished},
file = {/Users/Nasy/Zotero/storage/DI8BL2RG/Cohen et al. - 2019 - Gauge Equivariant Convolutional Networks and the Icosahedral CNN.pdf}
}
@online{cohenGroupEquivariantConvolutional2016,
title = {Group {{Equivariant Convolutional Networks}}},
author = {Cohen, Taco S. and Welling, Max},
date = {2016-06-03},
eprint = {1602.07576},
eprinttype = {arXiv},
eprintclass = {cs, stat},
doi = {10.48550/arXiv.1602.07576},
url = {http://arxiv.org/abs/1602.07576},
urldate = {2022-11-10},
abstract = {We introduce Group equivariant Convolutional Neural Networks (G-CNNs), a natural generalization of convolutional neural networks that reduces sample complexity by exploiting symmetries. G-CNNs use G-convolutions, a new type of layer that enjoys a substantially higher degree of weight sharing than regular convolution layers. G-convolutions increase the expressive capacity of the network without increasing the number of parameters. Group convolution layers are easy to use and can be implemented with negligible computational overhead for discrete groups generated by translations, reflections and rotations. G-CNNs achieve state of the art results on CIFAR10 and rotated MNIST.},
pubstate = {prepublished},
keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
file = {/Users/Nasy/Zotero/storage/LYIR6M49/Cohen and Welling - 2016 - Group Equivariant Convolutional Networks.pdf;/Users/Nasy/Zotero/storage/7TYKAYT5/1602.html}
}
@online{cohenSphericalCNNs2018,
title = {Spherical {{CNNs}}},
author = {Cohen, Taco S. and Geiger, Mario and Koehler, Jonas and Welling, Max},
date = {2018-02-25},
eprint = {1801.10130},
eprinttype = {arXiv},
eprintclass = {cs, stat},
doi = {10.48550/arXiv.1801.10130},
url = {http://arxiv.org/abs/1801.10130},
urldate = {2022-11-10},
abstract = {Convolutional Neural Networks (CNNs) have become the method of choice for learning problems involving 2D planar images. However, a number of problems of recent interest have created a demand for models that can analyze spherical images. Examples include omnidirectional vision for drones, robots, and autonomous cars, molecular regression problems, and global weather and climate modelling. A naive application of convolutional networks to a planar projection of the spherical signal is destined to fail, because the space-varying distortions introduced by such a projection will make translational weight sharing ineffective. In this paper we introduce the building blocks for constructing spherical CNNs. We propose a definition for the spherical cross-correlation that is both expressive and rotation-equivariant. The spherical correlation satisfies a generalized Fourier theorem, which allows us to compute it efficiently using a generalized (non-commutative) Fast Fourier Transform (FFT) algorithm. We demonstrate the computational efficiency, numerical accuracy, and effectiveness of spherical CNNs applied to 3D model recognition and atomization energy regression.},
pubstate = {prepublished},
keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
file = {/Users/Nasy/Zotero/storage/CIYKRBRY/Cohen et al. - 2018 - Spherical CNNs.pdf;/Users/Nasy/Zotero/storage/Z2L4ML98/1801.html}
}
@article{coleTcellReceptorTCRPeptide2014,
title = {T-Cell {{Receptor}} ({{TCR}})-{{Peptide Specificity Overrides Affinity-enhancing TCR-Major Histocompatibility Complex Interactions}}},
author = {Cole, David K. and Miles, Kim M. and Madura, Florian and Holland, Christopher J. and Schauenburg, Andrea J. A. and Godkin, Andrew J. and Bulek, Anna M. and Fuller, Anna and Akpovwa, Hephzibah J. E. and Pymm, Phillip G. and Liddy, Nathaniel and Sami, Malkit and Li, Yi and Rizkallah, Pierre J. and Jakobsen, Bent K. and Sewell, Andrew K.},
date = {2014-01-10},
journaltitle = {J Biol Chem},
volume = {289},
number = {2},
eprint = {24196962},
eprinttype = {pmid},
pages = {628--638},
issn = {0021-9258},
doi = {10.1074/jbc.M113.522110},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3887192/},
urldate = {2022-03-19},
abstract = {Background: TCR recognition of bipartite ligands composed of self (MHC) and non-self (peptide) maintains T-cell specificity., Results: Mutation of residues in the cognate peptide override TCR mutations that enhance MHC binding., Conclusion: TCR-pMHC binding affinity requires specific TCR-peptide interactions., Significance: Stabilization of TCR-pMHC engagement by TCR-peptide interactions maintains T-cell specificity and prevents recognition of self-pMHC in the periphery., αβ T-cell receptors (TCRs) engage antigens using complementarity-determining region (CDR) loops that are either germ line-encoded (CDR1 and CDR2) or somatically rearranged (CDR3). TCR ligands compose a presentation platform (major histocompatibility complex (MHC)) and a variable antigenic component consisting of a short “foreign” peptide. The sequence of events when the TCR engages its peptide-MHC (pMHC) ligand remains unclear. Some studies suggest that the germ line elements of the TCR engage the MHC prior to peptide scanning, but this order of binding is difficult to reconcile with some TCR-pMHC structures. Here, we used TCRs that exhibited enhanced pMHC binding as a result of mutations in either CDR2 and/or CDR3 loops, that bound to the MHC or peptide, respectively, to dissect the roles of these loops in stabilizing TCR-pMHC interactions. Our data show that TCR-peptide interactions play a strongly dominant energetic role providing a binding mode that is both temporally and energetically complementary with a system requiring positive selection by self-pMHC in the thymus and rapid recognition of non-self-pMHC in the periphery.},
pmcid = {PMC3887192},
annotation = {42 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/BW97KZJS/Cole et al. - 2014 - T-cell Receptor (TCR)-Peptide Specificity Override.pdf}
}
@article{croceDeepLearningPredictions2024,
title = {Deep Learning Predictions of {{TCR-epitope}} Interactions Reveal Epitope-Specific Chains in Dual Alpha {{T}} Cells},
author = {Croce, Giancarlo and Bobisse, Sara and Moreno, Dana Léa and Schmidt, Julien and Guillame, Philippe and Harari, Alexandre and Gfeller, David},
date = {2024-04-13},
journaltitle = {Nat Commun},
volume = {15},
number = {1},
eprint = {38615042},
eprinttype = {pmid},
pages = {3211},
issn = {2041-1723},
doi = {10.1038/s41467-024-47461-8},
abstract = {T cells have the ability to eliminate infected and cancer cells and play an essential role in cancer immunotherapy. T cell activation is elicited by the binding of the T cell receptor (TCR) to epitopes displayed on MHC molecules, and the TCR specificity is determined by the sequence of its α and β chains. Here, we collect and curate a dataset of 17,715 αβTCRs interacting with dozens of class I and class II epitopes. We use this curated data to develop MixTCRpred, an epitope-specific TCR-epitope interaction predictor. MixTCRpred accurately predicts TCRs recognizing several viral and cancer epitopes. MixTCRpred further provides a useful quality control tool for multiplexed single-cell TCR sequencing assays of epitope-specific T cells and pinpoints a substantial fraction of putative contaminants in public databases. Analysis of epitope-specific dual α T cells demonstrates that MixTCRpred can identify α chains mediating epitope recognition. Applying MixTCRpred to TCR repertoires from COVID-19 patients reveals enrichment of clonotypes predicted to bind an immunodominant SARS-CoV-2 epitope. Overall, MixTCRpred provides a robust tool to predict TCRs interacting with specific epitopes and interpret TCR-sequencing data from both bulk and epitope-specific T cells.},
langid = {english},
pmcid = {PMC11016097},
keywords = {COVID-19,Deep Learning,Epitopes,Humans,Immunodominant Epitopes,T-Lymphocytes},
file = {/Users/Nasy/Zotero/storage/CRWTZQTT/Croce et al. - 2024 - Deep learning predictions of TCR-epitope interactions reveal epitope-specific chains in dual alpha T.pdf}
}
@article{cuocoEnhancingGravitationalWaveScience2020,
title = {Enhancing {{Gravitational-Wave Science}} with {{Machine Learning}}},
author = {Cuoco, Elena and Powell, Jade and Cavaglià, Marco and Ackley, Kendall and Bejger, Michal and Chatterjee, Chayan and Coughlin, Michael and Coughlin, Scott and Easter, Paul and Essick, Reed and Gabbard, Hunter and Gebhard, Timothy and Ghosh, Shaon and Haegel, Leila and Iess, Alberto and Keitel, David and Marka, Zsuzsa and Marka, Szabolcs and Morawski, Filip and Nguyen, Tri and Ormiston, Rich and Puerrer, Michael and Razzano, Massimiliano and Staats, Kai and Vajente, Gabriele and Williams, Daniel},
date = {2020-12-04},
journaltitle = {Mach. Learn.: Sci. Technol.},
volume = {2},
number = {1},
eprint = {2005.03745},
eprinttype = {arXiv},
pages = {011002},
issn = {2632-2153},
doi = {10.1088/2632-2153/abb93a},
url = {http://arxiv.org/abs/2005.03745},
urldate = {2021-03-23},
abstract = {Machine learning has emerged as a popular and powerful approach for solving problems in astrophysics. We review applications of machine learning techniques for the analysis of ground-based gravitational-wave detector data. Examples include techniques for improving the sensitivity of Advanced LIGO and Advanced Virgo gravitational-wave searches, methods for fast measurements of the astrophysical parameters of gravitational-wave sources, and algorithms for reduction and characterization of non-astrophysical detector noise. These applications demonstrate how machine learning techniques may be harnessed to enhance the science that is possible with current and future gravitational-wave detectors.},
issue = {1},
langid = {english},
keywords = {Astrophysics - High Energy Astrophysical Phenomena,General Relativity and Quantum Cosmology},
annotation = {48 citations (Crossref) [2022-08-03]},
file = {/Users/Nasy/Zotero/storage/87DNKR3M/Cuoco et al. - 2020 - Enhancing Gravitational-Wave Science with Machine .pdf}
}
@online{daiWhyCanGPT2022,
title = {Why {{Can GPT Learn In-Context}}? {{Language Models Secretly Perform Gradient Descent}} as {{Meta-Optimizers}}},
shorttitle = {Why {{Can GPT Learn In-Context}}?},
author = {Dai, Damai and Sun, Yutao and Dong, Li and Hao, Yaru and Sui, Zhifang and Wei, Furu},
date = {2022-12-21},
eprint = {2212.10559},
eprinttype = {arXiv},
eprintclass = {cs},
doi = {10.48550/arXiv.2212.10559},
url = {http://arxiv.org/abs/2212.10559},
abstract = {Large pretrained language models have shown surprising In-Context Learning (ICL) ability. With a few demonstration input-label pairs, they can predict the label for an unseen input without additional parameter updates. Despite the great success in performance, the working mechanism of ICL still remains an open problem. In order to better understand how ICL works, this paper explains language models as meta-optimizers and understands ICL as a kind of implicit finetuning. Theoretically, we figure out that the Transformer attention has a dual form of gradient descent based optimization. On top of it, we understand ICL as follows: GPT first produces meta-gradients according to the demonstration examples, and then these meta-gradients are applied to the original GPT to build an ICL model. Experimentally, we comprehensively compare the behavior of ICL and explicit finetuning based on real tasks to provide empirical evidence that supports our understanding. The results prove that ICL behaves similarly to explicit finetuning at the prediction level, the representation level, and the attention behavior level. Further, inspired by our understanding of meta-optimization, we design a momentum-based attention by analogy with the momentum-based gradient descent algorithm. Its consistently better performance over vanilla attention supports our understanding again from another aspect, and more importantly, it shows the potential to utilize our understanding for future model designing.},
pubstate = {prepublished},
keywords = {Computer Science - Computation and Language},
file = {/Users/Nasy/Zotero/storage/DUKE28US/Dai et al. - 2022 - Why Can GPT Learn In-Context Language Models Secr.pdf;/Users/Nasy/Zotero/storage/2RZNTM66/2212.html}
}
@inproceedings{dallamicoRevisitingBetheHessianImproved2019,
title = {Revisiting the {{Bethe-Hessian}}: {{Improved Community Detection}} in {{Sparse Heterogeneous Graphs}}},
shorttitle = {Revisiting the {{Bethe-Hessian}}},
booktitle = {Advances in {{Neural Information Processing Systems}}},
author = {Dall' Amico, Lorenzo and Couillet, Romain and Tremblay, Nicolas},
date = {2019},
volume = {32},
publisher = {Curran Associates, Inc.},
url = {https://proceedings.neurips.cc/paper/2019/hash/3e6260b81898beacda3d16db379ed329-Abstract.html},
urldate = {2022-05-26},
abstract = {Spectral clustering is one of the most popular, yet still incompletely understood, methods for community detection on graphs. This article studies spectral clustering based on the Bethe-Hessian matrix Hr= (r\textasciicircum 2−1)In+D−rA for sparse heterogeneous graphs (following the degree-corrected stochastic block model) in a two-class setting. For a specific value r=ζ, clustering is shown to be insensitive to the degree heterogeneity. We then study the behavior of the informative eigenvector of H\_ζ and, as a result, predict the clustering accuracy. The article concludes with an overview of the generalization to more than two classes along with extensive simulations on synthetic and real networks corroborating our findings.},
file = {/Users/Nasy/Zotero/storage/6NXVV8I4/Dall' Amico et al. - 2019 - Revisiting the Bethe-Hessian Improved Community D.pdf}
}
@article{dashQuantifiablePredictiveFeatures2017,
title = {Quantifiable Predictive Features Define Epitope-Specific {{T}} Cell Receptor Repertoires},
author = {Dash, Pradyot and Fiore-Gartland, Andrew J. and Hertz, Tomer and Wang, George C. and Sharma, Shalini and Souquette, Aisha and Crawford, Jeremy Chase and Clemens, E. Bridie and Nguyen, Thi H. O. and Kedzierska, Katherine and La Gruta, Nicole L. and Bradley, Philip and Thomas, Paul G.},
date = {2017-07},
journaltitle = {Nature},
volume = {547},
number = {7661},
pages = {89--93},
publisher = {Nature Publishing Group},
issn = {1476-4687},
doi = {10.1038/nature22383},
url = {https://www.nature.com/articles/nature22383},
abstract = {The authors characterize epitope-specific T cell repertoires, identify shared and recognizable features of TCRs, and develop tools to classify antigen specificity on the basis of sequence analysis.},
issue = {7661},
langid = {english},
keywords = {Bioinformatics,VDJ recombination},
annotation = {444 citations (Crossref) [2022-09-16]},
file = {/Users/Nasy/Zotero/storage/68GPMTI9/Dash et al. - 2017 - Quantifiable predictive features define epitope-sp.pdf;/Users/Nasy/Zotero/storage/DLBIDI9H/nature22383.html}
}
@inproceedings{deanLargeScaleDistributed2012,
title = {Large {{Scale Distributed Deep Networks}}},
booktitle = {Advances in {{Neural Information Processing Systems}}},
author = {Dean, Jeffrey and Corrado, Greg and Monga, Rajat and Chen, Kai and Devin, Matthieu and Mao, Mark and aurelio Ranzato, Marc' and Senior, Andrew and Tucker, Paul and Yang, Ke and Le, Quoc and Ng, Andrew},
editor = {Pereira, F. and Burges, C. J. C. and Bottou, L. and Weinberger, K. Q.},
date = {2012},
volume = {25},
publisher = {Curran Associates, Inc.},
url = {https://proceedings.neurips.cc/paper/2012/file/6aca97005c68f1206823815f66102863-Paper.pdf},
keywords = {nosource}
}
@software{DeepLearningTuning2023,
title = {Deep {{Learning Tuning Playbook}}},
date = {2023-03-24T03:56:33Z},
origdate = {2023-01-18T23:32:32Z},
url = {https://github.com/google-research/tuning_playbook},
abstract = {A playbook for systematically maximizing the performance of deep learning models.},
organization = {Google Research}
}
@online{defazioRoadLessScheduled2024,
title = {The {{Road Less Scheduled}}},
author = {Defazio, Aaron and Yang, Xingyu Alice and Mehta, Harsh and Mishchenko, Konstantin and Khaled, Ahmed and Cutkosky, Ashok},
date = {2024-10-29},
eprint = {2405.15682},
eprinttype = {arXiv},
doi = {10.48550/arXiv.2405.15682},
url = {http://arxiv.org/abs/2405.15682},
urldate = {2024-11-08},
abstract = {Existing learning rate schedules that do not require specification of the optimization stopping step T are greatly out-performed by learning rate schedules that depend on T. We propose an approach that avoids the need for this stopping time by eschewing the use of schedules entirely, while exhibiting state-of-the-art performance compared to schedules across a wide family of problems ranging from convex problems to large-scale deep learning problems. Our Schedule-Free approach introduces no additional hyper-parameters over standard optimizers with momentum. Our method is a direct consequence of a new theory we develop that unifies scheduling and iterate averaging. An open source implementation of our method is available at https://github.com/facebookresearch/schedule\_free. Schedule-Free AdamW is the core algorithm behind our winning entry to the MLCommons 2024 AlgoPerf Algorithmic Efficiency Challenge Self-Tuning track.},
pubstate = {prepublished},
keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Mathematics - Optimization and Control,Statistics - Machine Learning},
file = {/Users/Nasy/Zotero/storage/VSWEFGFH/Defazio et al. - 2024 - The Road Less Scheduled.pdf;/Users/Nasy/Zotero/storage/D423FLQB/2405.html}
}
@article{deneuterFeasibilityMiningCD82018,
title = {On the Feasibility of Mining {{CD8}}+ {{T}} Cell Receptor Patterns Underlying Immunogenic Peptide Recognition},
author = {De Neuter, Nicolas and Bittremieux, Wout and Beirnaert, Charlie and Cuypers, Bart and Mrzic, Aida and Moris, Pieter and Suls, Arvid and Van Tendeloo, Viggo and Ogunjimi, Benson and Laukens, Kris and Meysman, Pieter},
date = {2018-03-01},
journaltitle = {Immunogenetics},
volume = {70},
number = {3},
pages = {159--168},
issn = {1432-1211},
doi = {10.1007/s00251-017-1023-5},
url = {https://doi.org/10.1007/s00251-017-1023-5},
abstract = {Current T cell epitope prediction tools are a valuable resource in designing targeted immunogenicity experiments. They typically focus on, and are able to, accurately predict peptide binding and presentation by major histocompatibility complex (MHC) molecules on the surface of antigen-presenting cells. However, recognition of the peptide-MHC complex by a T cell receptor (TCR) is often not included in these tools. We developed a classification approach based on random forest classifiers to predict recognition of a peptide by a T cell receptor and discover patterns that contribute to recognition. We considered two approaches to solve this problem: (1) distinguishing between two sets of TCRs that each bind to a known peptide and (2) retrieving TCRs that bind to a given peptide from a large pool of TCRs. Evaluation of the models on two HIV-1, B*08-restricted epitopes reveals good performance and hints towards structural CDR3 features that can determine peptide immunogenicity. These results are of particular importance as they show that prediction of T cell epitope and T cell epitope recognition based on sequence data is a feasible approach. In addition, the validity of our models not only serves as a proof of concept for the prediction of immunogenic T cell epitopes but also paves the way for more general and high-performing models.},
langid = {english},
keywords = {Bioinformatics,Immunoinformatics,Random forest classifier,T cell epitope prediction,T cell receptor},
annotation = {37 citations (Crossref) [2022-09-16]},
file = {/Users/Nasy/Zotero/storage/QGX6ZLWI/De Neuter et al. - 2018 - On the feasibility of mining CD8+ T cell receptor .pdf}
}