ref.bib

@online{10xgenomics10X2023,
  title = {{{10X}}},
  author = {10X Genomics},
  date = {2023},
  url = {https://www.10xgenomics.com/},
  abstract = {Resolving Biology to Advance Human Health},
  langid = {english},
  organization = {10x Genomics},
  file = {/Users/Nasy/Zotero/storage/S3JUZCYN/www.10xgenomics.com.html}
}

@article{abramsonAccurateStructurePrediction2024,
  title = {Accurate Structure Prediction of Biomolecular Interactions with {{AlphaFold}} 3},
  author = {Abramson, Josh and Adler, Jonas and Dunger, Jack and Evans, Richard and Green, Tim and Pritzel, Alexander and Ronneberger, Olaf and Willmore, Lindsay and Ballard, Andrew J. and Bambrick, Joshua and Bodenstein, Sebastian W. and Evans, David A. and Hung, Chia-Chun and O’Neill, Michael and Reiman, David and Tunyasuvunakool, Kathryn and Wu, Zachary and Žemgulytė, Akvilė and Arvaniti, Eirini and Beattie, Charles and Bertolli, Ottavia and Bridgland, Alex and Cherepanov, Alexey and Congreve, Miles and Cowen-Rivers, Alexander I. and Cowie, Andrew and Figurnov, Michael and Fuchs, Fabian B. and Gladman, Hannah and Jain, Rishub and Khan, Yousuf A. and Low, Caroline M. R. and Perlin, Kuba and Potapenko, Anna and Savy, Pascal and Singh, Sukhdeep and Stecula, Adrian and Thillaisundaram, Ashok and Tong, Catherine and Yakneen, Sergei and Zhong, Ellen D. and Zielinski, Michal and Žídek, Augustin and Bapst, Victor and Kohli, Pushmeet and Jaderberg, Max and Hassabis, Demis and Jumper, John M.},
  date = {2024-06},
  journaltitle = {Nature},
  volume = {630},
  number = {8016},
  pages = {493--500},
  publisher = {Nature Publishing Group},
  issn = {1476-4687},
  doi = {10.1038/s41586-024-07487-w},
  url = {https://www.nature.com/articles/s41586-024-07487-w},
  urldate = {2024-10-30},
  abstract = {The introduction of AlphaFold\,21 has spurred a revolution in modelling the structure of proteins and their interactions, enabling a huge range of applications in protein modelling and design2–6. Here we describe our AlphaFold\,3 model with a substantially updated diffusion-based architecture that is capable of predicting the joint structure of complexes including proteins, nucleic acids, small molecules, ions and modified residues. The new AlphaFold model demonstrates substantially improved accuracy over many previous specialized tools: far greater accuracy for protein–ligand interactions compared with state-of-the-art docking tools, much higher accuracy for protein–nucleic acid interactions compared with nucleic-acid-specific predictors and substantially higher antibody–antigen prediction accuracy compared with AlphaFold-Multimer v.2.37,8. Together, these results show that high-accuracy modelling across biomolecular space is possible within a single unified deep-learning framework.},
  langid = {english},
  keywords = {Drug discovery,Machine learning,Protein structure predictions,Structural biology},
  file = {/Users/Nasy/Zotero/storage/ACEATQ5V/Abramson et al. - 2024 - Accurate structure prediction of biomolecular interactions with AlphaFold 3.pdf}
}

@online{AccuratePredictionProtein,
  title = {Accurate Prediction of Protein Structures and Interactions Using a Three-Track Neural Network},
  url = {https://www.science.org/doi/10.1126/science.abj8754},
  urldate = {2022-06-06},
  file = {/Users/Nasy/Zotero/storage/Z7HR8Q7A/science.html}
}

@inproceedings{akibaOptunaNextgenerationHyperparameter2019,
  title = {Optuna: {{A Next-generation Hyperparameter Optimization Framework}}},
  shorttitle = {Optuna},
  booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Akiba, Takuya and Sano, Shotaro and Yanase, Toshihiko and Ohta, Takeru and Koyama, Masanori},
  date = {2019-07-25},
  series = {{{KDD}} '19},
  pages = {2623--2631},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3292500.3330701},
  url = {https://doi.org/10.1145/3292500.3330701},
  urldate = {2023-08-15},
  abstract = {The purpose of this study is to introduce new design-criteria for next-generation hyperparameter optimization software. The criteria we propose include (1) define-by-run API that allows users to construct the parameter search space dynamically, (2) efficient implementation of both searching and pruning strategies, and (3) easy-to-setup, versatile architecture that can be deployed for various purposes, ranging from scalable distributed computing to light-weight experiment conducted via interactive interface. In order to prove our point, we will introduce Optuna, an optimization software which is a culmination of our effort in the development of a next generation optimization software. As an optimization software designed with define-by-run principle, Optuna is particularly the first of its kind. We will present the design-techniques that became necessary in the development of the software that meets the above criteria, and demonstrate the power of our new design through experimental results and real world applications. Our software is available under the MIT license (https://github.com/pfnet/optuna/).},
  isbn = {978-1-4503-6201-6},
  keywords = {Bayesian optimization,black-box optimization,hyperparameter optimization,machine learning system},
  file = {/Users/Nasy/Zotero/storage/7FLDVKJX/Akiba et al. - 2019 - Optuna A Next-generation Hyperparameter Optimization Framework.pdf}
}

@inproceedings{akyurekWhatLearningAlgorithm2023,
  title = {\hspace{0pt}\hspace{0pt}{{What}} Learning Algorithm Is In-Context Learning? {{Investigations}} with Linear Models},
  shorttitle = {\hspace{0pt}\hspace{0pt}{{What}} Learning Algorithm Is In-Context Learning?},
  author = {Akyürek, Ekin and Schuurmans, Dale and Andreas, Jacob and Ma, Tengyu and Zhou, Denny},
  date = {2023-02-01},
  url = {https://openreview.net/forum?id=0g0X4H8yN4I},
  abstract = {Neural sequence models, especially transformers, exhibit a remarkable capacity for in-context learning. They can construct new predictors from sequences of labeled examples \$(x, f(x))\$ presented in the input without further parameter updates. We investigate the hypothesis that transformer-based in-context learners implement standard learning algorithms implicitly, by encoding context-specific parametric models in their hidden representations, and updating these implicit models as new examples appear in the context. Using linear regression as a model problem, we offer three sources of evidence for this hypothesis. First, we prove by construction that transformers can implement learning algorithms for linear models based on gradient descent and closed-form computation of regression parameters. Second, we show that trained in-context learners closely match the predictors computed by gradient descent, ridge regression, and exact least-squares regression, transitioning between different predictors as transformer depth and dataset noise vary. Third, we present preliminary evidence that in-context learners share algorithmic features with these predictors: learners' late layers encode weight vectors and moment matrices. These results suggest that in-context learning is understandable in algorithmic terms, and that (at least in the linear case) learners may work by rediscovering standard estimation algorithms.},
  eventtitle = {The {{Eleventh International Conference}} on {{Learning Representations}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/ZWQ2N4G3/Akyürek et al. - 2023 - What learning algorithm is in-context learning .pdf}
}

@article{anonymousRethinkingRoleDemonstrations2022,
  title = {Rethinking the {{Role}} of {{Demonstrations}}: {{What Makes In-Context Learning Work}}?},
  shorttitle = {Rethinking the {{Role}} of {{Demonstrations}}},
  author = {Anonymous},
  date = {2022-04-21},
  url = {https://openreview.net/forum?id=cnRGMv-Ak7u},
  urldate = {2023-04-09},
  abstract = {Large language models (LMs) are able to in-context learn -- perform a new task via inference alone by conditioning on a few input-label pairs (demonstrations) and making predictions for new inputs. However, there has been little understanding of how the model learns and which aspects of the demonstrations contribute to end task performance. In this paper, we show that ground truth demonstrations are in fact not required -- randomly replacing labels in the demonstrations barely hurts performance, consistently over 12 different models including GPT-3. Instead, we find that other aspects of the demonstrations are the key drivers of end task performance, including the fact that they provide a few examples of (1) the label space, (2) the distribution of the input text, and (3) the overall format of the sequence. Together, our analysis provides a new way of understanding how and why in-context learning works, while opening up new questions about how much can be learned from large language models through inference alone.},
  langid = {english},
  keywords = {⛔ No DOI found},
  file = {/Users/Nasy/Zotero/storage/TW9Q2VUA/Anonymous - 2022 - Rethinking the Role of Demonstrations What Makes .pdf}
}

@online{AntigenSpecificTCRSignatures,
  title = {Antigen-{{Specific TCR Signatures}} of {{Cytomegalovirus Infection}} | {{The Journal}} of {{Immunology}}},
  url = {https://www.jimmunol.org/content/202/3/979},
  urldate = {2022-03-19},
  file = {/Users/Nasy/Zotero/storage/VZCL8MY4/979.html}
}

@article{antunesInterpretingTCellCrossreactivity2017,
  title = {Interpreting {{T-Cell Cross-reactivity}} through {{Structure}}: {{Implications}} for {{TCR-Based Cancer Immunotherapy}}},
  shorttitle = {Interpreting {{T-Cell Cross-reactivity}} through {{Structure}}},
  author = {Antunes, Dinler A. and Rigo, Maurício M. and Freitas, Martiela V. and Mendes, Marcus F. A. and Sinigaglia, Marialva and Lizée, Gregory and Kavraki, Lydia E. and Selin, Liisa K. and Cornberg, Markus and Vieira, Gustavo F.},
  date = {2017},
  journaltitle = {Frontiers in Immunology},
  volume = {8},
  issn = {1664-3224},
  doi = {10.3389/fimmu.2017.01210},
  url = {https://www.frontiersin.org/articles/10.3389/fimmu.2017.01210},
  urldate = {2022-08-19},
  abstract = {Immunotherapy has become one of the most promising avenues for cancer treatment, making use of the patient’s own immune system to eliminate cancer cells. Clinical trials with T-cell-based immunotherapies have shown dramatic tumor regressions, being effective in multiple cancer types and for many different patients. Unfortunately, this progress was tempered by reports of serious (even fatal) side effects. Such therapies rely on the use of cytotoxic T-cell lymphocytes, an essential part of the adaptive immune system. Cytotoxic T-cells are regularly involved in surveillance and are capable of both eliminating diseased cells and generating protective immunological memory. The specificity of a given T-cell is determined through the structural interaction between the T-cell receptor (TCR) and a peptide-loaded major histocompatibility complex (MHC); i.e., an intracellular peptide–ligand displayed at the cell surface by an MHC molecule. However, a given TCR can recognize different peptide–MHC (pMHC) complexes, which can sometimes trigger an unwanted response that is referred to as T-cell cross-reactivity. This has become a major safety issue in TCR-based immunotherapies, following reports of melanoma-specific T-cells causing cytotoxic damage to healthy tissues (e.g., heart and nervous system). T-cell cross-reactivity has been extensively studied in the context of viral immunology and tissue transplantation. Growing evidence suggests that it is largely driven by structural similarities of seemingly unrelated pMHC complexes. Here, we review recent reports about the existence of pMHC “hot-spots” for cross-reactivity and propose the existence of a TCR interaction profile (i.e., a refinement of a more general TCR footprint in which some amino acid residues are more important than others in triggering T-cell cross-reactivity). We also make use of available structural data and pMHC models to interpret previously reported cross-reactivity patterns among virus-derived peptides. Our study provides further evidence that structural analyses of pMHC complexes can be used to assess the intrinsic likelihood of cross-reactivity among peptide-targets. Furthermore, we hypothesize that some apparent inconsistencies in reported cross-reactivities, such as a preferential directionality, might also be driven by particular structural features of the targeted pMHC complex. Finally, we explain why TCR-based immunotherapy provides a special context in which meaningful T-cell cross-reactivity predictions can be made.},
  file = {/Users/Nasy/Zotero/storage/M6NYJUEH/Antunes et al. - 2017 - Interpreting T-Cell Cross-reactivity through Struc.pdf}
}

@article{antunesInterpretingTCellCrossreactivity2017a,
  title = {Interpreting {{T-Cell Cross-reactivity}} through {{Structure}}: {{Implications}} for {{TCR-Based Cancer Immunotherapy}}},
  shorttitle = {Interpreting {{T-Cell Cross-reactivity}} through {{Structure}}},
  author = {Antunes, Dinler A. and Rigo, Maurício M. and Freitas, Martiela V. and Mendes, Marcus F. A. and Sinigaglia, Marialva and Lizée, Gregory and Kavraki, Lydia E. and Selin, Liisa K. and Cornberg, Markus and Vieira, Gustavo F.},
  date = {2017},
  journaltitle = {Frontiers in Immunology},
  volume = {8},
  issn = {1664-3224},
  url = {https://www.frontiersin.org/articles/10.3389/fimmu.2017.01210},
  urldate = {2023-08-15},
  abstract = {Immunotherapy has become one of the most promising avenues for cancer treatment, making use of the patient’s own immune system to eliminate cancer cells. Clinical trials with T-cell-based immunotherapies have shown dramatic tumor regressions, being effective in multiple cancer types and for many different patients. Unfortunately, this progress was tempered by reports of serious (even fatal) side effects. Such therapies rely on the use of cytotoxic T-cell lymphocytes, an essential part of the adaptive immune system. Cytotoxic T-cells are regularly involved in surveillance and are capable of both eliminating diseased cells and generating protective immunological memory. The specificity of a given T-cell is determined through the structural interaction between the T-cell receptor (TCR) and a peptide-loaded major histocompatibility complex (MHC); i.e., an intracellular peptide–ligand displayed at the cell surface by an MHC molecule. However, a given TCR can recognize different peptide–MHC (pMHC) complexes, which can sometimes trigger an unwanted response that is referred to as T-cell cross-reactivity. This has become a major safety issue in TCR-based immunotherapies, following reports of melanoma-specific T-cells causing cytotoxic damage to healthy tissues (e.g., heart and nervous system). T-cell cross-reactivity has been extensively studied in the context of viral immunology and tissue transplantation. Growing evidence suggests that it is largely driven by structural similarities of seemingly unrelated pMHC complexes. Here, we review recent reports about the existence of pMHC “hot-spots” for cross-reactivity and propose the existence of a TCR interaction profile (i.e., a refinement of a more general TCR footprint in which some amino acid residues are more important than others in triggering T-cell cross-reactivity). We also make use of available structural data and pMHC models to interpret previously reported cross-reactivity patterns among virus-derived peptides. Our study provides further evidence that structural analyses of pMHC complexes can be used to assess the intrinsic likelihood of cross-reactivity among peptide-targets. Furthermore, we hypothesize that some apparent inconsistencies in reported cross-reactivities, such as a preferential directionality, might also be driven by particular structural features of the targeted pMHC complex. Finally, we explain why TCR-based immunotherapy provides a special context in which meaningful T-cell cross-reactivity predictions can be made.},
  file = {/Users/Nasy/Zotero/storage/XZ3CQGJP/Antunes et al. - 2017 - Interpreting T-Cell Cross-reactivity through Structure Implications for TCR-Based Cancer Immunother.pdf}
}

@article{arnaudSensitiveIdentificationNeoantigens2022,
  title = {Sensitive Identification of Neoantigens and Cognate {{TCRs}} in Human Solid Tumors},
  author = {Arnaud, Marion and Chiffelle, Johanna and Genolet, Raphael and Navarro Rodrigo, Blanca and Perez, Marta A. S. and Huber, Florian and Magnin, Morgane and Nguyen-Ngoc, Tu and Guillaume, Philippe and Baumgaertner, Petra and Chong, Chloe and Stevenson, Brian J. and Gfeller, David and Irving, Melita and Speiser, Daniel E. and Schmidt, Julien and Zoete, Vincent and Kandalaft, Lana E. and Bassani-Sternberg, Michal and Bobisse, Sara and Coukos, George and Harari, Alexandre},
  date = {2022-05},
  journaltitle = {Nat Biotechnol},
  volume = {40},
  number = {5},
  pages = {656--660},
  publisher = {Nature Publishing Group},
  issn = {1546-1696},
  doi = {10.1038/s41587-021-01072-6},
  url = {https://www.nature.com/articles/s41587-021-01072-6},
  urldate = {2023-08-18},
  abstract = {The identification of patient-specific tumor antigens is complicated by the low frequency of T cells specific for each tumor antigen. Here we describe NeoScreen, a method that enables the sensitive identification of rare tumor (neo)antigens and of cognate T cell receptors (TCRs) expressed by tumor-infiltrating lymphocytes. T cells transduced with tumor antigen-specific TCRs identified by NeoScreen mediate regression of established tumors in patient-derived xenograft mice.},
  issue = {5},
  langid = {english},
  keywords = {Cancer immunotherapy,T-cell receptor,Translational research},
  file = {/Users/Nasy/Zotero/storage/S6R6BHZ3/Arnaud et al. - 2022 - Sensitive identification of neoantigens and cognate TCRs in human solid tumors.pdf}
}

@article{atchleySolvingProteinSequence2005,
  title = {Solving the Protein Sequence Metric Problem},
  author = {Atchley, William R. and Zhao, Jieping and Fernandes, Andrew D. and Drüke, Tanja},
  date = {2005-05-03},
  journaltitle = {Proceedings of the National Academy of Sciences},
  volume = {102},
  number = {18},
  pages = {6395--6400},
  publisher = {Proceedings of the National Academy of Sciences},
  doi = {10.1073/pnas.0408677102},
  url = {https://www.pnas.org/doi/full/10.1073/pnas.0408677102},
  urldate = {2022-05-12},
  annotation = {293 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/FXRKHVBC/Atchley et al. - 2005 - Solving the protein sequence metric problem.pdf}
}

@article{atchleySolvingProteinSequence2005a,
  title = {Solving the Protein Sequence Metric Problem},
  author = {Atchley, William R. and Zhao, Jieping and Fernandes, Andrew D. and Drüke, Tanja},
  date = {2005-05-03},
  journaltitle = {Proceedings of the National Academy of Sciences},
  volume = {102},
  number = {18},
  pages = {6395--6400},
  publisher = {Proceedings of the National Academy of Sciences},
  doi = {10.1073/pnas.0408677102},
  url = {https://www.pnas.org/doi/10.1073/pnas.0408677102},
  urldate = {2024-10-30},
  abstract = {Biological sequences are composed of long strings of alphabetic letters rather than arrays of numerical values. Lack of a natural underlying metric for comparing such alphabetic data significantly inhibits sophisticated statistical analyses of sequences, modeling structural and functional aspects of proteins, and related problems. Herein, we use multivariate statistical analyses on almost 500 amino acid attributes to produce a small set of highly interpretable numeric patterns of amino acid variability. These high-dimensional attribute data are summarized by five multidimensional patterns of attribute covariation that reflect polarity, secondary structure, molecular volume, codon diversity, and electrostatic charge. Numerical scores for each amino acid then transform amino acid sequences for statistical analyses. Relationships between transformed data and amino acid substitution matrices show significant associations for polarity and codon diversity scores. Transformed alphabetic data are used in analysis of variance and discriminant analysis to study DNA binding in the basic helix-loop-helix proteins. The transformed scores offer a general solution for analyzing a wide variety of sequence analysis problems.},
  file = {/Users/Nasy/Zotero/storage/UFNE47QG/Atchley et al. - 2005 - Solving the protein sequence metric problem.pdf}
}

@online{bachmannConstantCurvatureGraph2020,
  title = {Constant {{Curvature Graph Convolutional Networks}}},
  author = {Bachmann, Gregor and Bécigneul, Gary and Ganea, Octavian-Eugen},
  date = {2020-05-19},
  eprint = {1911.05076},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.1911.05076},
  url = {http://arxiv.org/abs/1911.05076},
  abstract = {Interest has been rising lately towards methods representing data in non-Euclidean spaces, e.g. hyperbolic or spherical, that provide specific inductive biases useful for certain real-world data properties, e.g. scale-free, hierarchical or cyclical. However, the popular graph neural networks are currently limited in modeling data only via Euclidean geometry and associated vector space operations. Here, we bridge this gap by proposing mathematically grounded generalizations of graph convolutional networks (GCN) to (products of) constant curvature spaces. We do this by i) introducing a unified formalism that can interpolate smoothly between all geometries of constant curvature, ii) leveraging gyro-barycentric coordinates that generalize the classic Euclidean concept of the center of mass. Our class of models smoothly recover their Euclidean counterparts when the curvature goes to zero from either side. Empirically, we outperform Euclidean GCNs in the tasks of node classification and distortion minimization for symbolic data exhibiting non-Euclidean behavior, according to their discrete curvature.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/ENAHPLJ4/Bachmann et al. - 2020 - Constant Curvature Graph Convolutional Networks.pdf;/Users/Nasy/Zotero/storage/7F3DBCJN/1911.html}
}

@article{baekAccuratePredictionProtein2021,
  title = {Accurate Prediction of Protein Structures and Interactions Using a Three-Track Neural Network},
  author = {Baek, Minkyung and DiMaio, Frank and Anishchenko, Ivan and Dauparas, Justas and Ovchinnikov, Sergey and Lee, Gyu Rie and Wang, Jue and Cong, Qian and Kinch, Lisa N. and Schaeffer, R. Dustin and Millán, Claudia and Park, Hahnbeom and Adams, Carson and Glassman, Caleb R. and DeGiovanni, Andy and Pereira, Jose H. and Rodrigues, Andria V. and van Dijk, Alberdina A. and Ebrecht, Ana C. and Opperman, Diederik J. and Sagmeister, Theo and Buhlheller, Christoph and Pavkov-Keller, Tea and Rathinaswamy, Manoj K. and Dalwadi, Udit and Yip, Calvin K. and Burke, John E. and Garcia, K. Christopher and Grishin, Nick V. and Adams, Paul D. and Read, Randy J. and Baker, David},
  options = {useprefix=true},
  date = {2021-08-20},
  journaltitle = {Science},
  volume = {373},
  number = {6557},
  pages = {871--876},
  issn = {0036-8075, 1095-9203},
  doi = {10.1126/science.abj8754},
  url = {https://www.science.org/doi/10.1126/science.abj8754},
  abstract = {Deep learning takes on protein folding                            In 1972, Anfinsen won a Nobel prize for demonstrating a connection between a protein’s amino acid sequence and its three-dimensional structure. Since 1994, scientists have competed in the biannual Critical Assessment of Structure Prediction (CASP) protein-folding challenge. Deep learning methods took center stage at CASP14, with DeepMind’s Alphafold2 achieving remarkable accuracy. Baek               et al               . explored network architectures based on the DeepMind framework. They used a three-track network to process sequence, distance, and coordinate information simultaneously and achieved accuracies approaching those of DeepMind. The method, RoseTTA fold, can solve challenging x-ray crystallography and cryo–electron microscopy modeling problems and generate accurate models of protein-protein complexes. —VV                        ,              Protein structure modeling enables the rapid solution of protein structures and provides insights into function.           ,              DeepMind presented notably accurate predictions at the recent 14th Critical Assessment of Structure Prediction (CASP14) conference. We explored network architectures that incorporate related ideas and obtained the best performance with a three-track network in which information at the one-dimensional (1D) sequence level, the 2D distance map level, and the 3D coordinate level is successively transformed and integrated. The three-track network produces structure predictions with accuracies approaching those of DeepMind in CASP14, enables the rapid solution of challenging x-ray crystallography and cryo–electron microscopy structure modeling problems, and provides insights into the functions of proteins of currently unknown structure. The network also enables rapid generation of accurate protein-protein complex models from sequence information alone, short-circuiting traditional approaches that require modeling of individual subunits followed by docking. We make the method available to the scientific community to speed biological research.},
  langid = {english},
  annotation = {815 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/4C5HGF4D/Baek et al. - 2021 - Accurate prediction of protein structures and inte.pdf;/Users/Nasy/Zotero/storage/N56PEIFE/abj8754_baek_sm.pdf}
}

@article{bagaevVDJdb2019Database2020,
  title = {{{VDJdb}} in 2019: Database Extension, New Analysis Infrastructure and a {{T-cell}} Receptor Motif Compendium},
  shorttitle = {{{VDJdb}} in 2019},
  author = {Bagaev, Dmitry V. and Vroomans, Renske M. A. and Samir, Jerome and Stervbo, Ulrik and Rius, Cristina and Dolton, Garry and Greenshields-Watson, Alexander and Attaf, Meriem and Egorov, Evgeny S. and Zvyagin, Ivan V. and Babel, Nina and Cole, David K. and Godkin, Andrew J. and Sewell, Andrew K. and Kesmir, Can and Chudakov, Dmitriy M. and Luciani, Fabio and Shugay, Mikhail},
  date = {2020-01-08},
  journaltitle = {Nucleic Acids Res},
  volume = {48},
  number = {D1},
  eprint = {31588507},
  eprinttype = {pmid},
  pages = {D1057-D1062},
  issn = {1362-4962},
  doi = {10.1093/nar/gkz874},
  abstract = {Here, we report an update of the VDJdb database with a substantial increase in the number of T-cell receptor (TCR) sequences and their cognate antigens. The update further provides a new database infrastructure featuring two additional analysis modes that facilitate database querying and real-world data analysis. The increased yield of TCR specificity identification methods and the overall increase in the number of studies in the field has allowed us to expand the database more than 5-fold. Furthermore, several new analysis methods are included. For example, batch annotation of TCR repertoire sequencing samples allows for annotating large datasets on-line. Using recently developed bioinformatic methods for TCR motif mining, we have built a reduced set of high-quality TCR motifs that can be used for both training TCR specificity predictors and matching against TCRs of interest. These additions enhance the versatility of the VDJdb in the task of exploring T-cell antigen specificities. The database is available at https://vdjdb.cdr3.net.},
  langid = {english},
  pmcid = {PMC6943061},
  keywords = {Amino Acid Sequence,Computational Biology,Databases Genetic,High-Throughput Nucleotide Sequencing,Humans,Nucleotide Motifs,Position-Specific Scoring Matrices,Receptors Antigen T-Cell,Sequence Analysis DNA,Software,V(D)J Recombination,Web Browser},
  annotation = {139 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/E8CLVMGU/Bagaev et al. - 2020 - VDJdb in 2019 database extension, new analysis in.pdf}
}

@inproceedings{balntasLearningLocalFeature2016,
  title = {Learning Local Feature Descriptors with Triplets and Shallow Convolutional Neural Networks},
  booktitle = {Procedings of the {{British Machine Vision Conference}} 2016},
  author = {Balntas, Vassileios and Riba, Edgar and Ponsa, Daniel and Mikolajczyk, Krystian},
  date = {2016},
  pages = {119.1-119.11},
  publisher = {British Machine Vision Association},
  location = {York, UK},
  doi = {10.5244/C.30.119},
  url = {http://www.bmva.org/bmvc/2016/papers/paper119/index.html},
  urldate = {2022-05-12},
  eventtitle = {British {{Machine Vision Conference}} 2016},
  isbn = {978-1-901725-59-9},
  langid = {english},
  keywords = {nosource},
  annotation = {122 citations (Crossref) [2022-08-03]}
}

@unpublished{baltusConvolutionalNeuralNetwork2022,
  title = {Convolutional Neural Network for Gravitational-Wave Early Alert: {{Going}} down in Frequency},
  shorttitle = {Convolutional Neural Network for Gravitational-Wave Early Alert},
  author = {Baltus, Grégory and Janquart, Justin and Lopez, Melissa and Narola, Harsh and Cudell, Jean-René},
  date = {2022-05-10},
  eprint = {2205.04750},
  eprinttype = {arXiv},
  eprintclass = {gr-qc},
  url = {http://arxiv.org/abs/2205.04750},
  urldate = {2022-05-11},
  abstract = {We present here the latest development of a machine-learning pipeline for pre-merger alerts from gravitational waves coming from binary neutron stars. This work starts from the convolutional neural networks introduced in our previous paper (PhysRevD.103.102003) that searched for three classes of early inspirals in simulated Gaussian noise colored with the design-sensitivity power-spectral density of LIGO. Our new network is able to search for any type of binary neutron stars, it can take into account all the detectors available, and it can see the events even earlier than the previous one. We study the performance of our method in three different types of noise: Gaussian O3 noise, real O3 noise, and predicted O4 noise. We show that our network performs almost as well in non-Gaussian noise as in Gaussian noise: our method is robust w.r.t. glitches and artifacts present in real noise. Although it would not have been able to trigger on the BNSs detected during O3 because their signal-to-noise ratio was too weak, we expect our network to find around 3 BNSs during O4 with a time before the merger between 3 and 88 s in advance.},
  keywords = {General Relativity and Quantum Cosmology},
  file = {/Users/Nasy/Zotero/storage/IWWT7H5B/Baltus et al. - 2022 - Convolutional neural network for gravitational-wav.pdf;/Users/Nasy/Zotero/storage/KBX3UDA4/2205.html}
}

@unpublished{bankAutoencoders2021,
  title = {Autoencoders},
  author = {Bank, Dor and Koenigstein, Noam and Giryes, Raja},
  date = {2021-04-03},
  eprint = {2003.05991},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/2003.05991},
  abstract = {An autoencoder is a specific type of a neural network, which is mainly designed to encode the input into a compressed and meaningful representation, and then decode it back such that the reconstructed input is similar as possible to the original one. This chapter surveys the different types of autoencoders that are mainly used today. It also describes various applications and use-cases of autoencoders.},
  keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/INB7XMFW/Bank et al. - 2021 - Autoencoders.pdf;/Users/Nasy/Zotero/storage/DNE553YC/2003.html}
}

@article{bayleyRobustMachineLearning2020,
  title = {Robust Machine Learning Algorithm to Search for Continuous Gravitational Waves},
  author = {Bayley, Joe and Messenger, Chris and Woan, Graham},
  date = {2020-10-21},
  journaltitle = {Phys. Rev. D},
  volume = {102},
  number = {8},
  pages = {083024},
  publisher = {American Physical Society},
  doi = {10.1103/PhysRevD.102.083024},
  url = {https://link.aps.org/doi/10.1103/PhysRevD.102.083024},
  urldate = {2021-04-29},
  abstract = {Many continuous gravitational wave searches are affected by instrumental spectral lines that could be confused with a continuous astrophysical signal. Several techniques have been developed to limit the effect of these lines by penalizing signals that appear in only a single detector. We have developed a general method, using a convolutional neural network, to reduce the impact of instrumental artifacts on searches that use the SOAP algorithm Bayley et al. [Phys. Rev. D 100, 023006 (2019)]. The method can identify features in corresponding frequency bands of each detector and classify these bands as containing a signal, an instrumental line, or noise. We tested the method against four different datasets: Gaussian noise with time gaps, data from the final run of Initial LIGO (S6) with signals added, the reference S6 mock data challenge dataset Walsh et al. [Phys. Rev. D 94, 124010 (2016)] and signals injected into data from the second advanced LIGO observing run (O2). Using the S6 mock data challenge dataset and at a 1\% false alarm probability we showed that at 95\% efficiency a fully automated SOAP search has a sensitivity corresponding to a coherent signal-to-noise ratio of 110, equivalent to a sensitivity depth of 10 Hz−1/2, making this automated search competitive with other searches requiring significantly more computing resources and human intervention.},
  issue = {8},
  annotation = {8 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/FBGYXIBY/Bayley et al. - 2020 - Robust machine learning algorithm to search for co.pdf;/Users/Nasy/Zotero/storage/6RZYY7ZT/PhysRevD.102.html}
}

@book{benedettiLecturesHyperbolicGeometry1992,
  title = {Lectures on {{Hyperbolic Geometry}}},
  author = {Benedetti, Riccardo and Petronio, Carlo},
  date = {1992},
  series = {Universitext},
  publisher = {Springer},
  location = {Berlin, Heidelberg},
  doi = {10.1007/978-3-642-58158-8},
  url = {http://link.springer.com/10.1007/978-3-642-58158-8},
  urldate = {2023-03-10},
  isbn = {978-3-540-55534-6 978-3-642-58158-8},
  keywords = {Cohomology,Flat Fiber Bundles,Geometry of Manifolds,Hyperbolic Geometry,manifold},
  file = {/Users/Nasy/Zotero/storage/RCMS5X86/Benedetti and Petronio - 1992 - Lectures on Hyperbolic Geometry.pdf}
}

@online{biPanguWeather3DHighResolution2022,
  title = {Pangu-{{Weather}}: {{A 3D High-Resolution Model}} for {{Fast}} and {{Accurate Global Weather Forecast}}},
  shorttitle = {Pangu-{{Weather}}},
  author = {Bi, Kaifeng and Xie, Lingxi and Zhang, Hengheng and Chen, Xin and Gu, Xiaotao and Tian, Qi},
  date = {2022-11-03},
  eprint = {2211.02556},
  eprinttype = {arXiv},
  eprintclass = {physics},
  doi = {10.48550/arXiv.2211.02556},
  url = {http://arxiv.org/abs/2211.02556},
  urldate = {2022-11-11},
  abstract = {In this paper, we present Pangu-Weather, a deep learning based system for fast and accurate global weather forecast. For this purpose, we establish a data-driven environment by downloading \$43\$ years of hourly global weather data from the 5th generation of ECMWF reanalysis (ERA5) data and train a few deep neural networks with about \$256\$ million parameters in total. The spatial resolution of forecast is \$0.25\textasciicircum\textbackslash circ\textbackslash times0.25\textasciicircum\textbackslash circ\$, comparable to the ECMWF Integrated Forecast Systems (IFS). More importantly, for the first time, an AI-based method outperforms state-of-the-art numerical weather prediction (NWP) methods in terms of accuracy (latitude-weighted RMSE and ACC) of all factors (e.g., geopotential, specific humidity, wind speed, temperature, etc.) and in all time ranges (from one hour to one week). There are two key strategies to improve the prediction accuracy: (i) designing a 3D Earth Specific Transformer (3DEST) architecture that formulates the height (pressure level) information into cubic data, and (ii) applying a hierarchical temporal aggregation algorithm to alleviate cumulative forecast errors. In deterministic forecast, Pangu-Weather shows great advantages for short to medium-range forecast (i.e., forecast time ranges from one hour to one week). Pangu-Weather supports a wide range of downstream forecast scenarios, including extreme weather forecast (e.g., tropical cyclone tracking) and large-member ensemble forecast in real-time. Pangu-Weather not only ends the debate on whether AI-based methods can surpass conventional NWP methods, but also reveals novel directions for improving deep learning weather forecast systems.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Physics - Atmospheric and Oceanic Physics},
  file = {/Users/Nasy/Zotero/storage/TGACLEGP/Bi et al. - 2022 - Pangu-Weather A 3D High-Resolution Model for Fast.pdf;/Users/Nasy/Zotero/storage/NSSVVBNE/2211.html}
}

@online{blochUniversalModelHyperbolic0100,
  title = {A {{Universal Model}} for {{Hyperbolic}}, {{Euclidean}} and {{Spherical Geometries}}},
  author = {Bloch, Andreas},
  year = {20:00:00 +0100},
  url = {http://andbloch.github.io/K-Stereographic-Model/},
  urldate = {2023-03-24},
  abstract = {This blogpost presents a geometric model that harnesses the formalism gyrovector spaces in order to capture all three geometries of constant curvature at once. Furthermore, the presented model allows to smoothly interpolate between different curvatures in order to learn the curvature of spaces jointly with the embeddings.},
  organization = {Andreas Bloch}
}

@article{bonnabelStochasticGradientDescent2013,
  title = {Stochastic {{Gradient Descent}} on {{Riemannian Manifolds}}},
  author = {Bonnabel, Silvère},
  date = {2013-09},
  journaltitle = {IEEE Transactions on Automatic Control},
  volume = {58},
  number = {9},
  pages = {2217--2229},
  issn = {1558-2523},
  doi = {10.1109/TAC.2013.2254619},
  abstract = {Stochastic gradient descent is a simple approach to find the local minima of a cost function whose evaluations are corrupted by noise. In this paper, we develop a procedure extending stochastic gradient descent algorithms to the case where the function is defined on a Riemannian manifold. We prove that, as in the Euclidian case, the gradient descent algorithm converges to a critical point of the cost function. The algorithm has numerous potential applications, and is illustrated here by four examples. In particular a novel gossip algorithm on the set of covariance matrices is derived and tested numerically.},
  eventtitle = {{{IEEE Transactions}} on {{Automatic Control}}},
  keywords = {Approximation methods,Convergence,Cost function,Covariance matrices,Manifolds,Nonlinear identification,Riemannian geometry,Standards,stochastic approximation,Trajectory},
  annotation = {170 citations (Crossref) [2023-03-09]},
  file = {/Users/Nasy/Zotero/storage/VJ5CADFZ/Bonnabel - 2013 - Stochastic Gradient Descent on Riemannian Manifold.pdf;/Users/Nasy/Zotero/storage/G7Z8YY8Y/6487381.html}
}

@article{bradleyStructurebasedPredictionCell2023,
  title = {Structure-Based Prediction of {{T}} Cell Receptor:Peptide-{{MHC}} Interactions},
  shorttitle = {Structure-Based Prediction of {{T}} Cell Receptor},
  author = {Bradley, Philip},
  date = {2023-01-20},
  journaltitle = {eLife},
  volume = {12},
  eprint = {36661395},
  eprinttype = {pmid},
  pages = {e82813},
  issn = {2050-084X},
  doi = {10.7554/eLife.82813},
  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9859041/},
  abstract = {The regulatory and effector functions of T cells are initiated by the binding of their cell-surface T cell receptor (TCR) to peptides presented by major histocompatibility complex (MHC) proteins on other cells. The specificity of TCR:peptide-MHC interactions, thus, underlies nearly all adaptive immune responses. Despite intense interest, generalizable predictive models of TCR:peptide-MHC specificity remain out of reach; two key barriers are the diversity of TCR recognition modes and the paucity of training data. Inspired by recent breakthroughs in protein structure prediction achieved by deep neural networks, we evaluated structural modeling as a potential avenue for prediction of TCR epitope specificity. We show that a specialized version of the neural network predictor AlphaFold can generate models of TCR:peptide-MHC interactions that can be used to discriminate correct from incorrect peptide epitopes with substantial accuracy. Although much work remains to be done for these predictions to have widespread practical utility, we are optimistic that deep learning-based structural modeling represents a path to generalizable prediction of TCR:peptide-MHC interaction specificity.},
  pmcid = {PMC9859041},
  file = {/Users/Nasy/Zotero/storage/EB7SX9BA/Bradley - Structure-based prediction of T cell receptorpeptide-MHC interactions.pdf}
}

@article{brahmaWhyDeepLearning2016,
  title = {Why {{Deep Learning Works}}: {{A Manifold Disentanglement Perspective}}},
  shorttitle = {Why {{Deep Learning Works}}},
  author = {Brahma, Pratik Prabhanjan and Wu, Dapeng and She, Yiyuan},
  date = {2016-10},
  journaltitle = {IEEE Transactions on Neural Networks and Learning Systems},
  volume = {27},
  number = {10},
  pages = {1997--2008},
  issn = {2162-2388},
  doi = {10.1109/TNNLS.2015.2496947},
  abstract = {Deep hierarchical representations of the data have been found out to provide better informative features for several machine learning applications. In addition, multilayer neural networks surprisingly tend to achieve better performance when they are subject to an unsupervised pretraining. The booming of deep learning motivates researchers to identify the factors that contribute to its success. One possible reason identified is the flattening of manifold-shaped data in higher layers of neural networks. However, it is not clear how to measure the flattening of such manifold-shaped data and what amount of flattening a deep neural network can achieve. For the first time, this paper provides quantitative evidence to validate the flattening hypothesis. To achieve this, we propose a few quantities for measuring manifold entanglement under certain assumptions and conduct experiments with both synthetic and real-world data. Our experimental results validate the proposition and lead to new insights on deep learning.},
  eventtitle = {{{IEEE Transactions}} on {{Neural Networks}} and {{Learning Systems}}},
  keywords = {Data models,Deep learning,disentanglement,Kernel,Machine learning,manifold learning,Manifolds,Neural networks,Nonhomogeneous media,Principal component analysis,unsupervised feature transformation},
  file = {/Users/Nasy/Zotero/storage/EEUYBAUP/7348689.html}
}

@article{brandesProteinBERTUniversalDeeplearning2022,
  title = {{{ProteinBERT}}: A Universal Deep-Learning Model of Protein Sequence and Function},
  shorttitle = {{{ProteinBERT}}},
  author = {Brandes, Nadav and Ofer, Dan and Peleg, Yam and Rappoport, Nadav and Linial, Michal},
  date = {2022-04-12},
  journaltitle = {Bioinformatics},
  volume = {38},
  number = {8},
  pages = {2102--2110},
  issn = {1367-4803},
  doi = {10.1093/bioinformatics/btac020},
  url = {https://doi.org/10.1093/bioinformatics/btac020},
  urldate = {2024-10-30},
  abstract = {Self-supervised deep language modeling has shown unprecedented success across natural language tasks, and has recently been repurposed to biological sequences. However, existing models and pretraining methods are designed and optimized for text analysis. We introduce ProteinBERT, a deep language model specifically designed for proteins. Our pretraining scheme combines language modeling with a novel task of Gene Ontology (GO) annotation prediction. We introduce novel architectural elements that make the model highly efficient and flexible to long sequences. The architecture of ProteinBERT consists of both local and global representations, allowing end-to-end processing of these types of inputs and outputs. ProteinBERT obtains near state-of-the-art performance, and sometimes exceeds it, on multiple benchmarks covering diverse protein properties (including protein structure, post-translational modifications and biophysical attributes), despite using a far smaller and faster model than competing deep-learning methods. Overall, ProteinBERT provides an efficient framework for rapidly training protein predictors, even with limited labeled data.Code and pretrained model weights are available at https://github.com/nadavbra/protein\_bert.Supplementary data are available at Bioinformatics online.},
  file = {/Users/Nasy/Zotero/storage/RDGJZUPR/Brandes et al. - 2022 - ProteinBERT a universal deep-learning model of protein sequence and function.pdf;/Users/Nasy/Zotero/storage/6Q8TN74K/6502274.html}
}

@inproceedings{brandstetterMessagePassingNeural2022,
  title = {Message {{Passing Neural PDE Solvers}}},
  author = {Brandstetter, Johannes and Worrall, Daniel E. and Welling, Max},
  date = {2022-05-08},
  url = {https://openreview.net/forum?id=vSix3HPYKSU},
  abstract = {The numerical solution of partial differential equations (PDEs) is difficult, having led to a century of research so far. Recently, there have been pushes to build neural--numerical hybrid solvers, which piggy-backs the modern trend towards fully end-to-end learned systems. Most works so far can only generalize over a subset of properties to which a generic solver would be faced, including: resolution, topology, geometry, boundary conditions, domain discretization regularity, dimensionality, etc. In this work, we build a solver, satisfying these properties, where all the components are based on neural message passing, replacing all heuristically designed components in the computation graph with backprop-optimized neural function approximators. We show that neural message passing solvers representationally contain some classical methods, such as finite differences, finite volumes, and WENO schemes. In order to encourage stability in training autoregressive models, we put forward a method that is based on the principle of zero-stability, posing stability as a domain adaptation problem. We validate our method on various fluid-like flow problems, demonstrating fast, stable, and accurate performance across different domain topologies, discretization, etc. in 1D and 2D. Our model outperforms state-of-the-art numerical solvers in the low resolution regime in terms of speed, and accuracy.},
  eventtitle = {International {{Conference}} on {{Learning Representations}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/2JQ5CA7C/Brandstetter et al. - 2022 - Message Passing Neural PDE Solvers.pdf;/Users/Nasy/Zotero/storage/VI987Y2L/forum.html}
}

@article{camachoBLASTArchitectureApplications2009,
  title = {{{BLAST}}+: Architecture and Applications},
  shorttitle = {{{BLAST}}+},
  author = {Camacho, Christiam and Coulouris, George and Avagyan, Vahram and Ma, Ning and Papadopoulos, Jason and Bealer, Kevin and Madden, Thomas L.},
  date = {2009-12-15},
  journaltitle = {BMC Bioinformatics},
  volume = {10},
  number = {1},
  pages = {421},
  issn = {1471-2105},
  doi = {10.1186/1471-2105-10-421},
  url = {https://doi.org/10.1186/1471-2105-10-421},
  abstract = {Sequence similarity searching is a very important bioinformatics task. While Basic Local Alignment Search Tool (BLAST) outperforms exact methods through its use of heuristics, the speed of the current BLAST software is suboptimal for very long queries or database sequences. There are also some shortcomings in the user-interface of the current command-line applications.},
  keywords = {Abstract Data Type,Basic Local Alignment Search Tool,Basic Local Alignment Search Tool Search,Lookup Table,Short Read Archive},
  annotation = {10045 citations (Crossref) [2022-09-14]},
  file = {/Users/Nasy/Zotero/storage/5WNA8BC7/Camacho et al. - 2009 - BLAST+ architecture and applications.pdf;/Users/Nasy/Zotero/storage/TPRBZ2EL/1471-2105-10-421.html}
}

@online{CBrainDeepLearning,
  title = {C-{{Brain}}: {{A}} Deep Learning Accelerator That Tames the Diversity of {{CNNs}} through Adaptive Data-Level Parallelization | {{IEEE Conference Publication}} | {{IEEE Xplore}}},
  url = {https://ieeexplore.ieee.org/document/7544365},
  urldate = {2022-03-29},
  file = {/Users/Nasy/Zotero/storage/H27EK3XF/7544365.html}
}

@inproceedings{cenRepresentationLearningAttributed2019,
  title = {Representation {{Learning}} for {{Attributed Multiplex Heterogeneous Network}}},
  booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Cen, Yukuo and Zou, Xu and Zhang, Jianwei and Yang, Hongxia and Zhou, Jingren and Tang, Jie},
  date = {2019-07-25},
  eprint = {1905.01669},
  eprinttype = {arXiv},
  eprintclass = {cs},
  pages = {1358--1368},
  doi = {10.1145/3292500.3330964},
  url = {http://arxiv.org/abs/1905.01669},
  urldate = {2022-05-30},
  abstract = {Network embedding (or graph embedding) has been widely used in many real-world applications. However, existing methods mainly focus on networks with single-typed nodes/edges and cannot scale well to handle large networks. Many real-world networks consist of billions of nodes and edges of multiple types, and each node is associated with different attributes. In this paper, we formalize the problem of embedding learning for the Attributed Multiplex Heterogeneous Network and propose a unified framework to address this problem. The framework supports both transductive and inductive learning. We also give the theoretical analysis of the proposed framework, showing its connection with previous works and proving its better expressiveness. We conduct systematical evaluations for the proposed framework on four different genres of challenging datasets: Amazon, YouTube, Twitter, and Alibaba. Experimental results demonstrate that with the learned embeddings from the proposed framework, we can achieve statistically significant improvements (e.g., 5.99-28.23\% lift by F1 scores; p{$<<$}0.01, t-test) over previous state-of-the-art methods for link prediction. The framework has also been successfully deployed on the recommendation system of a worldwide leading e-commerce company, Alibaba Group. Results of the offline A/B tests on product recommendation further confirm the effectiveness and efficiency of the framework in practice.},
  keywords = {Computer Science - Machine Learning,Computer Science - Social and Information Networks},
  annotation = {130 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/WTF9DY8Y/Cen et al. - 2019 - Representation Learning for Attributed Multiplex H.pdf;/Users/Nasy/Zotero/storage/4QKNK4D7/1905.html}
}

@inproceedings{cenRepresentationLearningAttributed2019a,
  title = {Representation {{Learning}} for {{Attributed Multiplex Heterogeneous Network}}},
  booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Cen, Yukuo and Zou, Xu and Zhang, Jianwei and Yang, Hongxia and Zhou, Jingren and Tang, Jie},
  date = {2019-07-25},
  series = {{{KDD}} '19},
  pages = {1358--1368},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3292500.3330964},
  url = {https://doi.org/10.1145/3292500.3330964},
  urldate = {2022-06-05},
  abstract = {Network embedding (or graph embedding) has been widely used in many real-world applications. However, existing methods mainly focus on networks with single-typed nodes/edges and cannot scale well to handle large networks. Many real-world networks consist of billions of nodes and edges of multiple types, and each node is associated with different attributes. In this paper, we formalize the problem of embedding learning for the Attributed Multiplex Heterogeneous Network and propose a unified framework to address this problem. The framework supports both transductive and inductive learning. We also give the theoretical analysis of the proposed framework, showing its connection with previous works and proving its better expressiveness. We conduct systematical evaluations for the proposed framework on four different genres of challenging datasets: Amazon, YouTube, Twitter, and Alibaba. Experimental results demonstrate that with the learned embeddings from the proposed framework, we can achieve statistically significant improvements (e.g., 5.99-28.23\% lift by F1 scores; p{$<<$}0.01, t-test) over previous state-of-the-art methods for link prediction. The framework has also been successfully deployed on the recommendation system of a worldwide leading e-commerce company, Alibaba Group. Results of the offline A/B tests on product recommendation further confirm the effectiveness and efficiency of the framework in practice.},
  isbn = {978-1-4503-6201-6},
  keywords = {heterogeneous network,multiplex network,network embedding},
  annotation = {130 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/9EU6VBKP/Cen et al. - 2019 - Representation Learning for Attributed Multiplex H.pdf}
}

@online{chamberlainNeuralEmbeddingsGraphs2017,
  title = {Neural {{Embeddings}} of {{Graphs}} in {{Hyperbolic Space}}},
  author = {Chamberlain, Benjamin Paul and Clough, James and Deisenroth, Marc Peter},
  date = {2017-05-29},
  eprint = {1705.10359},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.1705.10359},
  url = {http://arxiv.org/abs/1705.10359},
  urldate = {2023-02-02},
  abstract = {Neural embeddings have been used with great success in Natural Language Processing (NLP). They provide compact representations that encapsulate word similarity and attain state-of-the-art performance in a range of linguistic tasks. The success of neural embeddings has prompted significant amounts of research into applications in domains other than language. One such domain is graph-structured data, where embeddings of vertices can be learned that encapsulate vertex similarity and improve performance on tasks including edge prediction and vertex labelling. For both NLP and graph based tasks, embeddings have been learned in high-dimensional Euclidean spaces. However, recent work has shown that the appropriate isometric space for embedding complex networks is not the flat Euclidean space, but negatively curved, hyperbolic space. We present a new concept that exploits these recent insights and propose learning neural embeddings of graphs in hyperbolic space. We provide experimental evidence that embedding graphs in their natural geometry significantly improves performance on downstream tasks for several real-world public datasets.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/DDSF2KU5/Chamberlain et al. - 2017 - Neural Embeddings of Graphs in Hyperbolic Space.pdf;/Users/Nasy/Zotero/storage/K2PHFPEW/1705.html}
}

@online{chamiHyperbolicGraphConvolutional2019,
  title = {Hyperbolic {{Graph Convolutional Neural Networks}}},
  author = {Chami, Ines and Ying, Rex and Ré, Christopher and Leskovec, Jure},
  date = {2019-10-28},
  eprint = {1910.12933},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.1910.12933},
  url = {http://arxiv.org/abs/1910.12933},
  urldate = {2023-02-02},
  abstract = {Graph convolutional neural networks (GCNs) embed nodes in a graph into Euclidean space, which has been shown to incur a large distortion when embedding real-world graphs with scale-free or hierarchical structure. Hyperbolic geometry offers an exciting alternative, as it enables embeddings with much smaller distortion. However, extending GCNs to hyperbolic geometry presents several unique challenges because it is not clear how to define neural network operations, such as feature transformation and aggregation, in hyperbolic space. Furthermore, since input features are often Euclidean, it is unclear how to transform the features into hyperbolic embeddings with the right amount of curvature. Here we propose Hyperbolic Graph Convolutional Neural Network (HGCN), the first inductive hyperbolic GCN that leverages both the expressiveness of GCNs and hyperbolic geometry to learn inductive node representations for hierarchical and scale-free graphs. We derive GCN operations in the hyperboloid model of hyperbolic space and map Euclidean input features to embeddings in hyperbolic spaces with different trainable curvature at each layer. Experiments demonstrate that HGCN learns embeddings that preserve hierarchical structure, and leads to improved performance when compared to Euclidean analogs, even with very low dimensional embeddings: compared to state-of-the-art GCNs, HGCN achieves an error reduction of up to 63.1\% in ROC AUC for link prediction and of up to 47.5\% in F1 score for node classification, also improving state-of-the art on the Pubmed dataset.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/YSREMQL9/Chami et al. - 2019 - Hyperbolic Graph Convolutional Neural Networks.pdf;/Users/Nasy/Zotero/storage/PIMRR6YC/1910.html}
}

@article{chenAeSpTVAdaptiveEfficient2020,
  title = {{{aeSpTV}}: {{An Adaptive}} and {{Efficient Framework}} for {{Sparse Tensor-Vector Product Kernel}} on a {{High-Performance Computing Platform}}},
  shorttitle = {{{aeSpTV}}},
  author = {Chen, Yuedan and Xiao, Guoqing and Özsu, M. Tamer and Liu, Chubo and Zomaya, Albert Y. and Li, Tao},
  date = {2020-10},
  journaltitle = {IEEE Transactions on Parallel and Distributed Systems},
  volume = {31},
  number = {10},
  pages = {2329--2345},
  issn = {1558-2183},
  doi = {10.1109/TPDS.2020.2990429},
  abstract = {Multi-dimensional, large-scale, and sparse data, which can be neatly represented by sparse tensors, are increasingly used in various applications such as data analysis and machine learning. A high-performance sparse tensor-vector product (SpTV), one of the most fundamental operations of processing sparse tensors, is necessary for improving efficiency of related applications. In this article, we propose aeSpTV, an adaptive and efficient SpTV framework on Sunway TaihuLight supercomputer, to solve several challenges of optimizing SpTVon high-performance computing platforms. First, to map SpTV to Sunway architecture and tame expensive memory access latency and parallel writing conflict due to the intrinsic irregularity of SpTV, we introduce an adaptive SpTV parallelization. Second, to co-execute with the parallelization design while still ensuring high efficiency, we design a sparse tensor data structure named CSSoCR. Third, based on the adaptive SpTV parallelization with the novel tensor data structure, we present an autotuner that chooses the most befitting tensor partitioning method for aeSpTV using the variance analysis theory of mathematical statistics to achieve load balance. Fourth, to further leverage the computing power of Sunway, we propose customized optimizations for aeSpTV. Experimental results show that aeSpTV yields good sacalability on both thread-level and process-level parallelism of Sunway. It achieves a maximum GFLOPS of 195.69 on 128 processes. Additionally, it is proved that optimization effects of the partitioning autotuner and optimization techniques are remarkable.},
  eventtitle = {{{IEEE Transactions}} on {{Parallel}} and {{Distributed Systems}}},
  keywords = {Data structures,Kernel,Optimization,Parallel,Parallel processing,partition,Sparse matrices,sparse tensor data structure,sparse tensor-vector product,Sunway architecture,Tensors},
  annotation = {8 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/I4YA3G4J/9078890.html}
}

@article{chenBilayeredParallelTraining2019,
  title = {A {{Bi-layered Parallel Training Architecture}} for {{Large-Scale Convolutional Neural Networks}}},
  author = {Chen, Jianguo and Li, Kenli and Bilal, Kashif and {zhou}, xu and Li, Keqin and Yu, Philip S.},
  date = {2019-05},
  journaltitle = {IEEE Transactions on Parallel and Distributed Systems},
  volume = {30},
  number = {5},
  pages = {965--976},
  issn = {1558-2183},
  doi = {10.1109/TPDS.2018.2877359},
  abstract = {Benefitting from large-scale training datasets and the complex training network, Convolutional Neural Networks (CNNs) are widely applied in various fields with high accuracy. However, the training process of CNNs is very time-consuming, where large amounts of training samples and iterative operations are required to obtain high-quality weight parameters. In this paper, we focus on the time-consuming training process of large-scale CNNs and propose a Bi-layered Parallel Training (BPT-CNN) architecture in distributed computing environments. BPT-CNN consists of two main components: (a) an outer-layer parallel training for multiple CNN subnetworks on separate data subsets, and (b) an inner-layer parallel training for each subnetwork. In the outer-layer parallelism, we address critical issues of distributed and parallel computing, including data communication, synchronization, and workload balance. A heterogeneous-aware Incremental Data Partitioning and Allocation (IDPA) strategy is proposed, where large-scale training datasets are partitioned and allocated to the computing nodes in batches according to their computing power. To minimize the synchronization waiting during the global weight update process, an Asynchronous Global Weight Update (AGWU) strategy is proposed. In the inner-layer parallelism, we further accelerate the training process for each CNN subnetwork on each computer, where computation steps of convolutional layer and the local weight training are parallelized based on task-parallelism. We introduce task decomposition and scheduling strategies with the objectives of thread-level load balancing and minimum waiting time for critical paths. Extensive experimental results indicate that the proposed BPT-CNN effectively improves the training performance of CNNs while maintaining the accuracy.},
  eventtitle = {{{IEEE Transactions}} on {{Parallel}} and {{Distributed Systems}}},
  keywords = {Acceleration,bi-layered parallel computing,Big data,Computational modeling,Computer architecture,convolutional neural networks,deep learning,distributed computing,Distributed computing,Parallel processing,Task analysis,Training},
  annotation = {88 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/YCNXLFJM/Chen et al. - 2019 - A Bi-layered Parallel Training Architecture for La.pdf}
}

@article{chenDeepRetrosyntheticReaction2021,
  title = {Deep {{Retrosynthetic Reaction Prediction}} Using {{Local Reactivity}} and {{Global Attention}}},
  author = {Chen, Shuan and Jung, Yousung},
  date = {2021-10-25},
  journaltitle = {JACS Au},
  volume = {1},
  number = {10},
  pages = {1612--1620},
  doi = {10.1021/jacsau.1c00246},
  url = {https://doi.org/10.1021/jacsau.1c00246},
  urldate = {2024-07-11},
  abstract = {As a fundamental problem in chemistry, retrosynthesis aims at designing reaction pathways and intermediates for a target compound. The goal of artificial intelligence (AI)-aided retrosynthesis is to automate this process by learning from the previous chemical reactions to make new predictions. Although several models have demonstrated their potentials for automated retrosynthesis, there is still a significant need to further enhance the prediction accuracy to a more practical level. Here we propose a local retrosynthesis framework called LocalRetro, motivated by the chemical intuition that the molecular changes occur mostly locally during the chemical reactions. This differs from nearly all existing retrosynthesis methods that suggest reactants based on the global structures of the molecules, often containing fine details not directly relevant to the reactions. This local concept yields local reaction templates involving the atom and bond edits. Because the remote functional groups can also affect the overall reaction path as a secondary aspect, the proposed locally encoded retrosynthesis model is then further refined to account for the nonlocal effects of chemical reaction through a global attention mechanism. Our model shows a promising 89.5 and 99.2\% round-trip accuracy at top-1 and top-5 predictions for the USPTO-50K dataset containing 50\,016 reactions. We further demonstrate the validity of LocalRetro on a large dataset containing 479\,035 reactions (UTPTO-MIT) with comparable round-trip top-1 and top-5 accuracy of 87.0 and 97.4\%, respectively. The practical application of the model is also demonstrated by correctly predicting the synthesis pathways of five drug candidate molecules from various literature.},
  file = {/Users/Nasy/Zotero/storage/3T4SQFYJ/Chen and Jung - 2021 - Deep Retrosynthetic Reaction Prediction using Local Reactivity and Global Attention.pdf}
}

@online{chenFullyHyperbolicNeural2022,
  title = {Fully {{Hyperbolic Neural Networks}}},
  author = {Chen, Weize and Han, Xu and Lin, Yankai and Zhao, Hexu and Liu, Zhiyuan and Li, Peng and Sun, Maosong and Zhou, Jie},
  date = {2022-03-15},
  eprint = {2105.14686},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2105.14686},
  url = {http://arxiv.org/abs/2105.14686},
  urldate = {2023-03-08},
  abstract = {Hyperbolic neural networks have shown great potential for modeling complex data. However, existing hyperbolic networks are not completely hyperbolic, as they encode features in a hyperbolic space yet formalize most of their operations in the tangent space (a Euclidean subspace) at the origin of the hyperbolic space. This hybrid method greatly limits the modeling ability of networks. In this paper, we propose a fully hyperbolic framework to build hyperbolic networks based on the Lorentz model by adapting the Lorentz transformations (including boost and rotation) to formalize essential operations of neural networks. Moreover, we also prove that linear transformation in tangent spaces used by existing hyperbolic networks is a relaxation of the Lorentz rotation and does not include the boost, implicitly limiting the capabilities of existing hyperbolic networks. The experimental results on four NLP tasks show that our method has better performance for building both shallow and deep networks. Our code will be released to facilitate follow-up research.},
  pubstate = {prepublished},
  keywords = {Computer Science - Computation and Language,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/M95YSWX9/Chen et al. - 2022 - Fully Hyperbolic Neural Networks.pdf;/Users/Nasy/Zotero/storage/MEU5FT8W/2105.html}
}

@inproceedings{chenHpSpMVHeterogeneousParallel2019,
  title = {{{hpSpMV}}: {{A Heterogeneous Parallel Computing Scheme}} for {{SpMV}} on the {{Sunway TaihuLight Supercomputer}}},
  shorttitle = {{{hpSpMV}}},
  booktitle = {2019 {{IEEE}} 21st {{International Conference}} on {{High Performance Computing}} and {{Communications}}; {{IEEE}} 17th {{International Conference}} on {{Smart City}}; {{IEEE}} 5th {{International Conference}} on {{Data Science}} and {{Systems}} ({{HPCC}}/{{SmartCity}}/{{DSS}})},
  author = {Chen, Yuedan and Xiao, Guoqing and Xiao, Zheng and Yang, Wangdong},
  date = {2019-08},
  pages = {989--995},
  doi = {10.1109/HPCC/SmartCity/DSS.2019.00142},
  abstract = {Sparse matrix-vector multiplication (SpMV) is one of the most essential algorithms in various applications. This paper designs hpSpMV, a heterogeneous parallel computing scheme for SpMV, on the Sunway TaihuLight. There are three main contributions of the hpSpMV. (1) We propose a heterogeneous parallelization design for the SpMV based on the heterogeneous manycore architecture of the SW26010 of Sunway TaihuLight and the given sparse matrix formats. (2) We analyze the execution time of the proposed heterogeneous parallel SpMV on the Sunway. (3) We propose an auto-tuning framework to set the proper parameter of the heterogeneous parallel SpMV based on the execution time analysis on the Sunway. We test the hpSpMV's performance on the Sunway TaihuLight, the result analysis indicates that the hpSpMV has obvious performance improvement and good scalability on the Sunway TaihuLight.},
  eventtitle = {2019 {{IEEE}} 21st {{International Conference}} on {{High Performance Computing}} and {{Communications}}; {{IEEE}} 17th {{International Conference}} on {{Smart City}}; {{IEEE}} 5th {{International Conference}} on {{Data Science}} and {{Systems}} ({{HPCC}}/{{SmartCity}}/{{DSS}})},
  keywords = {Computer architecture,Conferences,Heterogeneous sparse matrix format sparse matrix-vector multiplication parallel Sunway TaihuLight,Kernel,Parallel processing,Program processors,Sparse matrices,Supercomputers},
  annotation = {3 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/SWMCNACB/8855593.html}
}

@inproceedings{chenImprovingInContextFewShot2022,
  title = {Improving {{In-Context Few-Shot Learning}} via {{Self-Supervised Training}}},
  booktitle = {Proceedings of the 2022 {{Conference}} of the {{North American Chapter}} of the {{Association}} for {{Computational Linguistics}}: {{Human Language Technologies}}},
  author = {Chen, Mingda and Du, Jingfei and Pasunuru, Ramakanth and Mihaylov, Todor and Iyer, Srini and Stoyanov, Veselin and Kozareva, Zornitsa},
  date = {2022-07},
  pages = {3558--3573},
  publisher = {Association for Computational Linguistics},
  location = {Seattle, United States},
  doi = {10.18653/v1/2022.naacl-main.260},
  url = {https://aclanthology.org/2022.naacl-main.260},
  abstract = {Self-supervised pretraining has made few-shot learning possible for many NLP tasks. But the pretraining objectives are not typically adapted specifically for in-context few-shot learning. In this paper, we propose to use self-supervision in an intermediate training stage between pretraining and downstream few-shot usage with the goal to teach the model to perform in-context few shot learning. We propose and evaluate four self-supervised objectives on two benchmarks. We find that the intermediate self-supervision stage produces models that outperform strong baselines. Ablation study shows that several factors affect the downstream performance, such as the amount of training data and the diversity of the self-supervised objectives. Human-annotated cross-task supervision and self-supervision are complementary. Qualitative analysis suggests that the self-supervised-trained models are better at following task requirements.},
  eventtitle = {{{NAACL-HLT}} 2022},
  annotation = {0 citations (Crossref) [2023-04-09]},
  file = {/Users/Nasy/Zotero/storage/4HDLFHX2/Chen et al. - 2022 - Improving In-Context Few-Shot Learning via Self-Su.pdf}
}

@inproceedings{chenPMEProjectedMetric2018,
  title = {{{PME}}: {{Projected Metric Embedding}} on {{Heterogeneous Networks}} for {{Link Prediction}}},
  shorttitle = {{{PME}}},
  booktitle = {Proceedings of the 24th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Chen, Hongxu and Yin, Hongzhi and Wang, Weiqing and Wang, Hao and Nguyen, Quoc Viet Hung and Li, Xue},
  date = {2018-07-19},
  series = {{{KDD}} '18},
  pages = {1177--1186},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3219819.3219986},
  url = {https://doi.org/10.1145/3219819.3219986},
  urldate = {2022-06-05},
  abstract = {Heterogenous information network embedding aims to embed heterogenous information networks (HINs) into low dimensional spaces, in which each vertex is represented as a low-dimensional vector, and both global and local network structures in the original space are preserved. However, most of existing heterogenous information network embedding models adopt the dot product to measure the proximity in the low dimensional space, and thus they can only preserve the first-order proximity and are insufficient to capture the global structure. Compared with homogenous information networks, there are multiple types of links (i.e., multiple relations) in HINs, and the link distribution w.r.t relations is highly skewed. To address the above challenging issues, we propose a novel heterogenous information network embedding model PME based on the metric learning to capture both first-order and second-order proximities in a unified way. To alleviate the potential geometrical inflexibility of existing metric learning approaches, we propose to build object and relation embeddings in separate object space and relation spaces rather than in a common space. Afterwards, we learn embeddings by firstly projecting vertices from object space to corresponding relation space and then calculate the proximity between projected vertices. To overcome the heavy skewness of the link distribution w.r.t relations and avoid "over-sampling'' or "under-sampling'' for each relation, we propose a novel loss-aware adaptive sampling approach for the model optimization. Extensive experiments have been conducted on a large-scale HIN dataset, and the experimental results show superiority of our proposed PME model in terms of prediction accuracy and scalability.},
  isbn = {978-1-4503-5552-0},
  keywords = {heterogenous network embedding,link prediction},
  annotation = {87 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/Y2E2GLGV/Chen et al. - 2018 - PME Projected Metric Embedding on Heterogeneous N.pdf}
}

@inproceedings{chenSemisupervisedUserProfiling2019,
  title = {Semi-Supervised {{User Profiling}} with {{Heterogeneous Graph Attention Networks}}},
  booktitle = {Proceedings of the {{Twenty-Eighth International Joint Conference}} on {{Artificial Intelligence}}},
  author = {Chen, Weijian and Gu, Yulong and Ren, Zhaochun and He, Xiangnan and Xie, Hongtao and Guo, Tong and Yin, Dawei and Zhang, Yongdong},
  date = {2019-08},
  pages = {2116--2122},
  publisher = {International Joint Conferences on Artificial Intelligence Organization},
  location = {Macao, China},
  doi = {10.24963/ijcai.2019/293},
  url = {https://www.ijcai.org/proceedings/2019/293},
  urldate = {2022-05-26},
  abstract = {Aiming to represent user characteristics and personal interests, the task of user profiling is playing an increasingly important role for many real-world applications, e.g., e-commerce and social networks platforms. By exploiting the data like texts and user behaviors, most existing solutions address user profiling as a classification task, where each user is formulated as an individual data instance. Nevertheless, a user’s profile is not only reflected from her/his affiliated data, but also can be inferred from other users, e.g., the users that have similar copurchase behaviors in e-commerce, the friends in social networks, etc. In this paper, we approach user profiling in a semi-supervised manner, developing a generic solution based on heterogeneous graph learning. On the graph, nodes represent the entities of interest (e.g., users, items, attributes of items, etc.), and edges represent the interactions between entities. Our heterogeneous graph attention networks (HGAT) method learns the representation for each entity by accounting for the graph structure, and exploits the attention mechanism to discriminate the importance of each neighbor entity. Through such a learning scheme, HGAT can leverage both unsupervised information and limited labels of users to build the predictor. Extensive experiments on a real-world e-commerce dataset verify the effectiveness and rationality of our HGAT for user profiling.},
  eventtitle = {Twenty-{{Eighth International Joint Conference}} on {{Artificial Intelligence}} \{\vphantom\}{{IJCAI-19}}\vphantom\{\}},
  isbn = {978-0-9992411-4-1},
  langid = {english},
  annotation = {25 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/T2ZACIS8/Chen et al. - 2019 - Semi-supervised User Profiling with Heterogeneous .pdf}
}

@article{chenSequenceStructuralAnalyses2017,
  title = {Sequence and {{Structural Analyses Reveal Distinct}} and {{Highly Diverse Human CD8}}+ {{TCR Repertoires}} to {{Immunodominant Viral Antigens}}},
  author = {Chen, Guobing and Yang, Xinbo and Ko, Annette and Sun, Xiaoping and Gao, Mingming and Zhang, Yongqing and Shi, Alvin and Mariuzza, Roy A. and Weng, Nan-Ping},
  date = {2017-04-18},
  journaltitle = {Cell Rep},
  volume = {19},
  number = {3},
  eprint = {28423320},
  eprinttype = {pmid},
  pages = {569--583},
  issn = {2211-1247},
  doi = {10.1016/j.celrep.2017.03.072},
  abstract = {A diverse T~cell receptor (TCR) repertoire is essential for controlling viral infections. However, information about TCR repertoires to defined viral antigens is limited. We performed a comprehensive analysis of~CD8+ TCR repertoires for two dominant viral epitopes: pp65495-503 (NLV) of cytomegalovirus and M158-66 (GIL) of influenza A virus. The highly individualized repertoires (87-5,533 α or β clonotypes per subject) comprised thousands of unique TCRα and TCRβ sequences and dozens of distinct complementary determining region (CDR)3α and CDR3β motifs. However, diversity is effectively restricted by preferential V-J combinations, CDR3 lengths, and CDR3α/CDR3β pairings. Structures of two GIL-specific TCRs bound to GIL-HLA-A2 provided a potential explanation for the lower diversity of GIL-specific versus NLV-specific repertoires. These anti-viral TCRs occupied up to 3.4\% of the CD8+ TCRβ repertoire, ensuring broad T~cell responses to single epitopes. Our portrait of two anti-viral TCR repertoires may inform the development of predictors of immune protection.},
  langid = {english},
  pmcid = {PMC5472051},
  keywords = {Adult,Amino Acid Motifs,Amino Acid Sequence,Antibody Affinity,Antigens Viral,CD8 T cells,CD8-Positive T-Lymphocytes,Clone Cells,Complementarity Determining Regions,Consensus Sequence,Cytomegalovirus,HLA-A2 Antigen,human,Humans,Immunodominant Epitopes,Influenza A virus,Peptides,Protein Binding,Receptors Antigen T-Cell,Species Specificity,TCR repertoire,TCR-pMHC structure,αβ TCRs for CMV-NLV,αβ TCRs for IAV-GIL},
  annotation = {76 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/HJ4VAYPH/Chen et al. - 2017 - Sequence and Structural Analyses Reveal Distinct a.pdf}
}

@inproceedings{chenSimpleFrameworkContrastive2020,
  title = {A {{Simple Framework}} for {{Contrastive Learning}} of {{Visual Representations}}},
  booktitle = {Proceedings of the 37th {{International Conference}} on {{Machine Learning}}},
  author = {Chen, Ting and Kornblith, Simon and Norouzi, Mohammad and Hinton, Geoffrey},
  date = {2020-11-21},
  pages = {1597--1607},
  publisher = {PMLR},
  issn = {2640-3498},
  url = {https://proceedings.mlr.press/v119/chen20j.html},
  urldate = {2022-03-17},
  abstract = {This paper presents SimCLR: a simple framework for contrastive learning of visual representations. We simplify recently proposed contrastive self-supervised learning algorithms without requiring specialized architectures or a memory bank. In order to understand what enables the contrastive prediction tasks to learn useful representations, we systematically study the major components of our framework. We show that (1) composition of data augmentations plays a critical role in defining effective predictive tasks, (2) introducing a learnable nonlinear transformation between the representation and the contrastive loss substantially improves the quality of the learned representations, and (3) contrastive learning benefits from larger batch sizes and more training steps compared to supervised learning. By combining these findings, we are able to considerably outperform previous methods for self-supervised and semi-supervised learning on ImageNet. A linear classifier trained on self-supervised representations learned by SimCLR achieves 76.5\% top-1 accuracy, which is a 7\% relative improvement over previous state-of-the-art, matching the performance of a supervised ResNet-50. When fine-tuned on only 1\% of the labels, we achieve 85.8\% top-5 accuracy, outperforming AlexNet with 100X fewer labels.},
  eventtitle = {International {{Conference}} on {{Machine Learning}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/DVKM3MFQ/Chen et al. - 2020 - A Simple Framework for Contrastive Learning of Vis.pdf;/Users/Nasy/Zotero/storage/M8V8MP6Q/Chen et al. - 2020 - A Simple Framework for Contrastive Learning of Vis.pdf}
}

@online{chenSymbolicDiscoveryOptimization2023,
  title = {Symbolic {{Discovery}} of {{Optimization Algorithms}}},
  author = {Chen, Xiangning and Liang, Chen and Huang, Da and Real, Esteban and Wang, Kaiyuan and Liu, Yao and Pham, Hieu and Dong, Xuanyi and Luong, Thang and Hsieh, Cho-Jui and Lu, Yifeng and Le, Quoc V.},
  date = {2023-05-08},
  eprint = {2302.06675},
  eprinttype = {arXiv},
  doi = {10.48550/arXiv.2302.06675},
  url = {http://arxiv.org/abs/2302.06675},
  urldate = {2024-11-08},
  abstract = {We present a method to formulate algorithm discovery as program search, and apply it to discover optimization algorithms for deep neural network training. We leverage efficient search techniques to explore an infinite and sparse program space. To bridge the large generalization gap between proxy and target tasks, we also introduce program selection and simplification strategies. Our method discovers a simple and effective optimization algorithm, \$\textbackslash textbf\{Lion\}\$ (\$\textbackslash textit\{Evo\$\textbackslash textbf\{L\}\$ved S\$\textbackslash textbf\{i\}\$gn M\$\textbackslash textbf\{o\}\$me\$\textbackslash textbf\{n\}\$tum\}\$). It is more memory-efficient than Adam as it only keeps track of the momentum. Different from adaptive optimizers, its update has the same magnitude for each parameter calculated through the sign operation. We compare Lion with widely used optimizers, such as Adam and Adafactor, for training a variety of models on different tasks. On image classification, Lion boosts the accuracy of ViT by up to 2\% on ImageNet and saves up to 5x the pre-training compute on JFT. On vision-language contrastive learning, we achieve 88.3\% \$\textbackslash textit\{zero-shot\}\$ and 91.1\% \$\textbackslash textit\{fine-tuning\}\$ accuracy on ImageNet, surpassing the previous best results by 2\% and 0.1\%, respectively. On diffusion models, Lion outperforms Adam by achieving a better FID score and reducing the training compute by up to 2.3x. For autoregressive, masked language modeling, and fine-tuning, Lion exhibits a similar or better performance compared to Adam. Our analysis of Lion reveals that its performance gain grows with the training batch size. It also requires a smaller learning rate than Adam due to the larger norm of the update produced by the sign function. Additionally, we examine the limitations of Lion and identify scenarios where its improvements are small or not statistically significant. Lion is also successfully deployed in production systems such as Google search ads CTR model.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing},
  file = {/Users/Nasy/Zotero/storage/9SM7KL8X/Chen et al. - 2023 - Symbolic Discovery of Optimization Algorithms.pdf;/Users/Nasy/Zotero/storage/CFKR3T2W/2302.html}
}

@inproceedings{chilimbiProjectAdamBuilding2014,
  title = {Project {{Adam}}: {{Building}} an {{Efficient}} and {{Scalable Deep Learning Training System}}},
  booktitle = {11th {{USENIX Symposium}} on {{Operating Systems Design}} and {{Implementation}} ({{OSDI}} 14)},
  author = {Chilimbi, Trishul and Suzue, Yutaka and Apacible, Johnson and Kalyanaraman, Karthik},
  date = {2014-10},
  pages = {571--582},
  publisher = {USENIX Association},
  location = {Broomfield, CO},
  url = {https://www.usenix.org/conference/osdi14/technical-sessions/presentation/chilimbi},
  isbn = {978-1-931971-16-4},
  keywords = {nosource}
}

@article{chiuSearchingYoungStellar2021,
  title = {Searching for Young Stellar Objects through {{SEDs}} by Machine Learning},
  author = {Chiu, Y. -L. and Ho, C. -T. and Wang, D. -W. and Lai, S. -P.},
  date = {2021-07-01},
  journaltitle = {Astronomy and Computing},
  volume = {36},
  pages = {100470},
  issn = {2213-1337},
  doi = {10.1016/j.ascom.2021.100470},
  url = {https://www.sciencedirect.com/science/article/pii/S221313372100024X},
  urldate = {2022-07-29},
  abstract = {Accurate measurements of statistical properties, such as the star formation rate and the lifetime of young stellar objects (YSOs) in different stages, are essential for constraining star formation theories. However, it is a difficult task to separate galaxies and YSOs based on spectral energy distributions (SEDs) alone, because they contain both thermal emission from stars and dust around them and no reliable theories can be applied to distinguish them. Here we compare different machine learning algorithms and develop the Spectrum Classifier of Astronomical Objects (SCAO), based on Fully Connected Neural Network (FCN), to classify regular stars, galaxies, and YSOs. Superior to previous classifiers, SCAO is solely trained by high quality data labeled in Molecular Cores to Planet-forming Disks (c2d) catalog without a priori theoretical knowledge, and provides excellent results with high precision ({$>$}96\%) and recall ({$>$}98\%) for YSOs when only eight bands are included. We systematically investigate the effects of observation errors and distance effects, and show that high accuracy performance is still maintained even when using fluxes of only three bands (IRAC 3, a=IRAC 4, and MIPS 1) in the long wavelengths regime, because the silicate absorption feature is automatically detected by SCAO. Finally, we applied SCAO to Spitzer Enhanced Imaging Products (SEIP), the most complete catalog of Spitzer observations, and found 129219 YSO candidates. The website from SCAO is available at http://scao.astr.nthu.edu.tw.},
  langid = {english},
  keywords = {Deep learning,Neural networks,SED,YSO},
  annotation = {3 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/EWC9MFJA/Chiu et al. - 2021 - Searching for young stellar objects through SEDs b.pdf;/Users/Nasy/Zotero/storage/BC92DAZD/S221313372100024X.html}
}

@online{choiEmpiricalComparisonsOptimizers2020,
  title = {On {{Empirical Comparisons}} of {{Optimizers}} for {{Deep Learning}}},
  author = {Choi, Dami and Shallue, Christopher J. and Nado, Zachary and Lee, Jaehoon and Maddison, Chris J. and Dahl, George E.},
  date = {2020-06-15},
  eprint = {1910.05446},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.1910.05446},
  url = {http://arxiv.org/abs/1910.05446},
  abstract = {Selecting an optimizer is a central step in the contemporary deep learning pipeline. In this paper, we demonstrate the sensitivity of optimizer comparisons to the hyperparameter tuning protocol. Our findings suggest that the hyperparameter search space may be the single most important factor explaining the rankings obtained by recent empirical comparisons in the literature. In fact, we show that these results can be contradicted when hyperparameter search spaces are changed. As tuning effort grows without bound, more general optimizers should never underperform the ones they can approximate (i.e., Adam should never perform worse than momentum), but recent attempts to compare optimizers either assume these inclusion relationships are not practically relevant or restrict the hyperparameters in ways that break the inclusions. In our experiments, we find that inclusion relationships between optimizers matter in practice and always predict optimizer comparisons. In particular, we find that the popular adaptive gradient methods never underperform momentum or gradient descent. We also report practical tips around tuning often ignored hyperparameters of adaptive gradient methods and raise concerns about fairly benchmarking optimizers for neural network training.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/PZNQE4WA/Choi et al. - 2020 - On Empirical Comparisons of Optimizers for Deep Le.pdf;/Users/Nasy/Zotero/storage/W6T3WUTK/1910.html}
}

@inproceedings{chopraLearningSimilarityMetric2005,
  title = {Learning a Similarity Metric Discriminatively, with Application to Face Verification},
  booktitle = {2005 {{IEEE Computer Society Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}}'05)},
  author = {Chopra, S. and Hadsell, R. and LeCun, Y.},
  date = {2005-06},
  volume = {1},
  pages = {539-546 vol. 1},
  issn = {1063-6919},
  doi = {10.1109/CVPR.2005.202},
  abstract = {We present a method for training a similarity metric from data. The method can be used for recognition or verification applications where the number of categories is very large and not known during training, and where the number of training samples for a single category is very small. The idea is to learn a function that maps input patterns into a target space such that the L/sub 1/ norm in the target space approximates the "semantic" distance in the input space. The method is applied to a face verification task. The learning process minimizes a discriminative loss function that drives the similarity metric to be small for pairs of faces from the same person, and large for pairs from different persons. The mapping from raw to the target space is a convolutional network whose architecture is designed for robustness to geometric distortions. The system is tested on the Purdue/AR face database which has a very high degree of variability in the pose, lighting, expression, position, and artificial occlusions such as dark glasses and obscuring scarves.},
  eventtitle = {2005 {{IEEE Computer Society Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}}'05)},
  keywords = {Artificial neural networks,Character generation,Drives,Face recognition,Glass,Robustness,Spatial databases,Support vector machine classification,Support vector machines,System testing},
  annotation = {1219 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/SJNIFE7T/1467314.html}
}

@article{choudharySiGMaNetDeepLearning2022,
  title = {{{SiGMa-Net}}: {{Deep}} Learning Network to Distinguish Binary Black Hole Signals from Short-Duration Noise Transients},
  shorttitle = {{{SiGMa-Net}}},
  author = {Choudhary, Sunil and More, Anupreeta and Suyamprakasam, Sudhagar and Bose, Sukanta},
  date = {2022-02-17},
  url = {https://arxiv.org/abs/2202.08671v1},
  urldate = {2022-02-18},
  abstract = {Blip glitches, a type of short-duration noise transient in the LIGO--Virgo data, are a nuisance for the binary black hole (BBH) searches. They affect the BBH search sensitivity significantly because their time-domain morphologies are very similar, and that creates difficulty in vetoing them. In this work, we construct a deep-learning neural network to efficiently distinguish BBH signals from blip glitches. We introduce sine-Gaussian projection (SGP) maps, which are projections of GW frequency-domain data snippets on a basis of sine-Gaussians defined by the quality factor and central frequency. We feed the SGP maps to our deep-learning neural network, which classifies the BBH signals and blips. Whereas the BBH signals are simulated, the blips used are taken from real data throughout our analysis. We show that our network significantly improves the identification of the BBH signals in comparison to the results obtained using traditional-\$\textbackslash chi\textasciicircum 2\$ and sine-Gaussian \$\textbackslash chi\textasciicircum 2\$. For example, our network improves the sensitivity by 75\% at a false-positive rate of \$10\textasciicircum\{-2\}\$ for BBHs with total mass in the range \$[80,140]\textasciitilde M\_\{\textbackslash odot\}\$ and SNR in the range \$[3,8]\$. Also, it correctly identifies 95\% of the real GW events in GWTC-3. The computation time for classification is a few minutes for thousands of SGP maps on a single core. With further optimisation in the next version of our algorithm, we expect a further reduction in the computational cost. Our proposed method can potentially improve the veto process in the LIGO--Virgo GW data analysis and conceivably support identifying GW signals in low-latency pipelines.},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/5P24WFYX/Choudhary et al. - 2022 - SiGMa-Net Deep learning network to distinguish bi.pdf;/Users/Nasy/Zotero/storage/KTEA2UK4/2202.html}
}

@article{chuangDebiasedContrastiveLearning2020,
  title = {Debiased Contrastive Learning},
  author = {Chuang, Ching-Yao and Robinson, Joshua and Lin, Yen-Chen and Torralba, Antonio and Jegelka, Stefanie},
  date = {2020},
  journaltitle = {Advances in Neural Information Processing Systems},
  volume = {33},
  file = {/Users/Nasy/Zotero/storage/ZEMMUQ5V/Chuang et al. - Debiased Contrastive Learning.pdf}
}

@article{chuaReducedOrderModelingArtificial2019,
  title = {Reduced-{{Order Modeling}} with {{Artificial Neurons}} for {{Gravitational-Wave Inference}}},
  author = {Chua, Alvin J. K. and Galley, Chad R. and Vallisneri, Michele},
  date = {2019-05-28},
  journaltitle = {Phys. Rev. Lett.},
  volume = {122},
  number = {21},
  pages = {211101},
  publisher = {American Physical Society},
  doi = {10.1103/PhysRevLett.122.211101},
  url = {https://link.aps.org/doi/10.1103/PhysRevLett.122.211101},
  urldate = {2021-10-18},
  abstract = {Gravitational-wave data analysis is rapidly absorbing techniques from deep learning, with a focus on convolutional networks and related methods that treat noisy time series as images. We pursue an alternative approach, in which waveforms are first represented as weighted sums over reduced bases (reduced-order modeling); we then train artificial neural networks to map gravitational-wave source parameters into basis coefficients. Statistical inference proceeds directly in coefficient space, where it is theoretically straightforward and computationally efficient. The neural networks also provide analytic waveform derivatives, which are useful for gradient-based sampling schemes. We demonstrate fast and accurate coefficient interpolation for the case of a four-dimensional binary-inspiral waveform family and discuss promising applications of our framework in parameter estimation.},
  issue = {21},
  annotation = {34 citations (Crossref) [2022-08-03]}
}

@online{cohenGaugeEquivariantConvolutional2019,
  title = {Gauge {{Equivariant Convolutional Networks}} and the {{Icosahedral CNN}}},
  author = {Cohen, Taco S. and Weiler, Maurice and Kicanaoglu, Berkay and Welling, Max},
  date = {2019-05-13},
  eprint = {1902.04615},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/1902.04615},
  urldate = {2022-11-28},
  abstract = {The principle of equivariance to symmetry transformations enables a theoretically grounded approach to neural network architecture design. Equivariant networks have shown excellent performance and data efficiency on vision and medical imaging problems that exhibit symmetries. Here we show how this principle can be extended beyond global symmetries to local gauge transformations. This enables the development of a very general class of convolutional neural networks on manifolds that depend only on the intrinsic geometry, and which includes many popular methods from equivariant and geometric deep learning. We implement gauge equivariant CNNs for signals defined on the surface of the icosahedron, which provides a reasonable approximation of the sphere. By choosing to work with this very regular manifold, we are able to implement the gauge equivariant convolution using a single conv2d call, making it a highly scalable and practical alternative to Spherical CNNs. Using this method, we demonstrate substantial improvements over previous methods on the task of segmenting omnidirectional images and global climate patterns.},
  pubstate = {prepublished},
  file = {/Users/Nasy/Zotero/storage/DI8BL2RG/Cohen et al. - 2019 - Gauge Equivariant Convolutional Networks and the Icosahedral CNN.pdf}
}

@online{cohenGroupEquivariantConvolutional2016,
  title = {Group {{Equivariant Convolutional Networks}}},
  author = {Cohen, Taco S. and Welling, Max},
  date = {2016-06-03},
  eprint = {1602.07576},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.1602.07576},
  url = {http://arxiv.org/abs/1602.07576},
  urldate = {2022-11-10},
  abstract = {We introduce Group equivariant Convolutional Neural Networks (G-CNNs), a natural generalization of convolutional neural networks that reduces sample complexity by exploiting symmetries. G-CNNs use G-convolutions, a new type of layer that enjoys a substantially higher degree of weight sharing than regular convolution layers. G-convolutions increase the expressive capacity of the network without increasing the number of parameters. Group convolution layers are easy to use and can be implemented with negligible computational overhead for discrete groups generated by translations, reflections and rotations. G-CNNs achieve state of the art results on CIFAR10 and rotated MNIST.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/LYIR6M49/Cohen and Welling - 2016 - Group Equivariant Convolutional Networks.pdf;/Users/Nasy/Zotero/storage/7TYKAYT5/1602.html}
}

@online{cohenSphericalCNNs2018,
  title = {Spherical {{CNNs}}},
  author = {Cohen, Taco S. and Geiger, Mario and Koehler, Jonas and Welling, Max},
  date = {2018-02-25},
  eprint = {1801.10130},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.1801.10130},
  url = {http://arxiv.org/abs/1801.10130},
  urldate = {2022-11-10},
  abstract = {Convolutional Neural Networks (CNNs) have become the method of choice for learning problems involving 2D planar images. However, a number of problems of recent interest have created a demand for models that can analyze spherical images. Examples include omnidirectional vision for drones, robots, and autonomous cars, molecular regression problems, and global weather and climate modelling. A naive application of convolutional networks to a planar projection of the spherical signal is destined to fail, because the space-varying distortions introduced by such a projection will make translational weight sharing ineffective. In this paper we introduce the building blocks for constructing spherical CNNs. We propose a definition for the spherical cross-correlation that is both expressive and rotation-equivariant. The spherical correlation satisfies a generalized Fourier theorem, which allows us to compute it efficiently using a generalized (non-commutative) Fast Fourier Transform (FFT) algorithm. We demonstrate the computational efficiency, numerical accuracy, and effectiveness of spherical CNNs applied to 3D model recognition and atomization energy regression.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/CIYKRBRY/Cohen et al. - 2018 - Spherical CNNs.pdf;/Users/Nasy/Zotero/storage/Z2L4ML98/1801.html}
}

@article{coleTcellReceptorTCRPeptide2014,
  title = {T-Cell {{Receptor}} ({{TCR}})-{{Peptide Specificity Overrides Affinity-enhancing TCR-Major Histocompatibility Complex Interactions}}},
  author = {Cole, David K. and Miles, Kim M. and Madura, Florian and Holland, Christopher J. and Schauenburg, Andrea J. A. and Godkin, Andrew J. and Bulek, Anna M. and Fuller, Anna and Akpovwa, Hephzibah J. E. and Pymm, Phillip G. and Liddy, Nathaniel and Sami, Malkit and Li, Yi and Rizkallah, Pierre J. and Jakobsen, Bent K. and Sewell, Andrew K.},
  date = {2014-01-10},
  journaltitle = {J Biol Chem},
  volume = {289},
  number = {2},
  eprint = {24196962},
  eprinttype = {pmid},
  pages = {628--638},
  issn = {0021-9258},
  doi = {10.1074/jbc.M113.522110},
  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3887192/},
  urldate = {2022-03-19},
  abstract = {Background: TCR recognition of bipartite ligands composed of self (MHC) and non-self (peptide) maintains T-cell specificity., Results: Mutation of residues in the cognate peptide override TCR mutations that enhance MHC binding., Conclusion: TCR-pMHC binding affinity requires specific TCR-peptide interactions., Significance: Stabilization of TCR-pMHC engagement by TCR-peptide interactions maintains T-cell specificity and prevents recognition of self-pMHC in the periphery., αβ T-cell receptors (TCRs) engage antigens using complementarity-determining region (CDR) loops that are either germ line-encoded (CDR1 and CDR2) or somatically rearranged (CDR3). TCR ligands compose a presentation platform (major histocompatibility complex (MHC)) and a variable antigenic component consisting of a short “foreign” peptide. The sequence of events when the TCR engages its peptide-MHC (pMHC) ligand remains unclear. Some studies suggest that the germ line elements of the TCR engage the MHC prior to peptide scanning, but this order of binding is difficult to reconcile with some TCR-pMHC structures. Here, we used TCRs that exhibited enhanced pMHC binding as a result of mutations in either CDR2 and/or CDR3 loops, that bound to the MHC or peptide, respectively, to dissect the roles of these loops in stabilizing TCR-pMHC interactions. Our data show that TCR-peptide interactions play a strongly dominant energetic role providing a binding mode that is both temporally and energetically complementary with a system requiring positive selection by self-pMHC in the thymus and rapid recognition of non-self-pMHC in the periphery.},
  pmcid = {PMC3887192},
  annotation = {42 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/BW97KZJS/Cole et al. - 2014 - T-cell Receptor (TCR)-Peptide Specificity Override.pdf}
}

@article{croceDeepLearningPredictions2024,
  title = {Deep Learning Predictions of {{TCR-epitope}} Interactions Reveal Epitope-Specific Chains in Dual Alpha {{T}} Cells},
  author = {Croce, Giancarlo and Bobisse, Sara and Moreno, Dana Léa and Schmidt, Julien and Guillame, Philippe and Harari, Alexandre and Gfeller, David},
  date = {2024-04-13},
  journaltitle = {Nat Commun},
  volume = {15},
  number = {1},
  eprint = {38615042},
  eprinttype = {pmid},
  pages = {3211},
  issn = {2041-1723},
  doi = {10.1038/s41467-024-47461-8},
  abstract = {T cells have the ability to eliminate infected and cancer cells and play an essential role in cancer immunotherapy. T cell activation is elicited by the binding of the T cell receptor (TCR) to epitopes displayed on MHC molecules, and the TCR specificity is determined by the sequence of its α and β chains. Here, we collect and curate a dataset of 17,715 αβTCRs interacting with dozens of class I and class II epitopes. We use this curated data to develop MixTCRpred, an epitope-specific TCR-epitope interaction predictor. MixTCRpred accurately predicts TCRs recognizing several viral and cancer epitopes. MixTCRpred further provides a useful quality control tool for multiplexed single-cell TCR sequencing assays of epitope-specific T cells and pinpoints a substantial fraction of putative contaminants in public databases. Analysis of epitope-specific dual α T cells demonstrates that MixTCRpred can identify α chains mediating epitope recognition. Applying MixTCRpred to TCR repertoires from COVID-19 patients reveals enrichment of clonotypes predicted to bind an immunodominant SARS-CoV-2 epitope. Overall, MixTCRpred provides a robust tool to predict TCRs interacting with specific epitopes and interpret TCR-sequencing data from both bulk and epitope-specific T cells.},
  langid = {english},
  pmcid = {PMC11016097},
  keywords = {COVID-19,Deep Learning,Epitopes,Humans,Immunodominant Epitopes,T-Lymphocytes},
  file = {/Users/Nasy/Zotero/storage/CRWTZQTT/Croce et al. - 2024 - Deep learning predictions of TCR-epitope interactions reveal epitope-specific chains in dual alpha T.pdf}
}

@article{cuocoEnhancingGravitationalWaveScience2020,
  title = {Enhancing {{Gravitational-Wave Science}} with {{Machine Learning}}},
  author = {Cuoco, Elena and Powell, Jade and Cavaglià, Marco and Ackley, Kendall and Bejger, Michal and Chatterjee, Chayan and Coughlin, Michael and Coughlin, Scott and Easter, Paul and Essick, Reed and Gabbard, Hunter and Gebhard, Timothy and Ghosh, Shaon and Haegel, Leila and Iess, Alberto and Keitel, David and Marka, Zsuzsa and Marka, Szabolcs and Morawski, Filip and Nguyen, Tri and Ormiston, Rich and Puerrer, Michael and Razzano, Massimiliano and Staats, Kai and Vajente, Gabriele and Williams, Daniel},
  date = {2020-12-04},
  journaltitle = {Mach. Learn.: Sci. Technol.},
  volume = {2},
  number = {1},
  eprint = {2005.03745},
  eprinttype = {arXiv},
  pages = {011002},
  issn = {2632-2153},
  doi = {10.1088/2632-2153/abb93a},
  url = {http://arxiv.org/abs/2005.03745},
  urldate = {2021-03-23},
  abstract = {Machine learning has emerged as a popular and powerful approach for solving problems in astrophysics. We review applications of machine learning techniques for the analysis of ground-based gravitational-wave detector data. Examples include techniques for improving the sensitivity of Advanced LIGO and Advanced Virgo gravitational-wave searches, methods for fast measurements of the astrophysical parameters of gravitational-wave sources, and algorithms for reduction and characterization of non-astrophysical detector noise. These applications demonstrate how machine learning techniques may be harnessed to enhance the science that is possible with current and future gravitational-wave detectors.},
  issue = {1},
  langid = {english},
  keywords = {Astrophysics - High Energy Astrophysical Phenomena,General Relativity and Quantum Cosmology},
  annotation = {48 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/87DNKR3M/Cuoco et al. - 2020 - Enhancing Gravitational-Wave Science with Machine .pdf}
}

@online{daiWhyCanGPT2022,
  title = {Why {{Can GPT Learn In-Context}}? {{Language Models Secretly Perform Gradient Descent}} as {{Meta-Optimizers}}},
  shorttitle = {Why {{Can GPT Learn In-Context}}?},
  author = {Dai, Damai and Sun, Yutao and Dong, Li and Hao, Yaru and Sui, Zhifang and Wei, Furu},
  date = {2022-12-21},
  eprint = {2212.10559},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2212.10559},
  url = {http://arxiv.org/abs/2212.10559},
  abstract = {Large pretrained language models have shown surprising In-Context Learning (ICL) ability. With a few demonstration input-label pairs, they can predict the label for an unseen input without additional parameter updates. Despite the great success in performance, the working mechanism of ICL still remains an open problem. In order to better understand how ICL works, this paper explains language models as meta-optimizers and understands ICL as a kind of implicit finetuning. Theoretically, we figure out that the Transformer attention has a dual form of gradient descent based optimization. On top of it, we understand ICL as follows: GPT first produces meta-gradients according to the demonstration examples, and then these meta-gradients are applied to the original GPT to build an ICL model. Experimentally, we comprehensively compare the behavior of ICL and explicit finetuning based on real tasks to provide empirical evidence that supports our understanding. The results prove that ICL behaves similarly to explicit finetuning at the prediction level, the representation level, and the attention behavior level. Further, inspired by our understanding of meta-optimization, we design a momentum-based attention by analogy with the momentum-based gradient descent algorithm. Its consistently better performance over vanilla attention supports our understanding again from another aspect, and more importantly, it shows the potential to utilize our understanding for future model designing.},
  pubstate = {prepublished},
  keywords = {Computer Science - Computation and Language},
  file = {/Users/Nasy/Zotero/storage/DUKE28US/Dai et al. - 2022 - Why Can GPT Learn In-Context Language Models Secr.pdf;/Users/Nasy/Zotero/storage/2RZNTM66/2212.html}
}

@inproceedings{dallamicoRevisitingBetheHessianImproved2019,
  title = {Revisiting the {{Bethe-Hessian}}: {{Improved Community Detection}} in {{Sparse Heterogeneous Graphs}}},
  shorttitle = {Revisiting the {{Bethe-Hessian}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Dall' Amico, Lorenzo and Couillet, Romain and Tremblay, Nicolas},
  date = {2019},
  volume = {32},
  publisher = {Curran Associates, Inc.},
  url = {https://proceedings.neurips.cc/paper/2019/hash/3e6260b81898beacda3d16db379ed329-Abstract.html},
  urldate = {2022-05-26},
  abstract = {Spectral clustering is one of the most popular, yet still incompletely understood, methods for community detection on graphs. This article studies spectral clustering based on the Bethe-Hessian matrix Hr= (r\textasciicircum 2−1)In+D−rA for sparse heterogeneous graphs (following the degree-corrected stochastic block model) in a two-class setting. For a specific value r=ζ, clustering is shown to be insensitive to the degree heterogeneity. We then study the behavior of the informative eigenvector of H\_ζ and, as a result, predict the clustering accuracy. The article concludes with an overview of the generalization to more than two classes along with extensive simulations on synthetic and real networks corroborating our findings.},
  file = {/Users/Nasy/Zotero/storage/6NXVV8I4/Dall' Amico et al. - 2019 - Revisiting the Bethe-Hessian Improved Community D.pdf}
}

@article{dashQuantifiablePredictiveFeatures2017,
  title = {Quantifiable Predictive Features Define Epitope-Specific {{T}} Cell Receptor Repertoires},
  author = {Dash, Pradyot and Fiore-Gartland, Andrew J. and Hertz, Tomer and Wang, George C. and Sharma, Shalini and Souquette, Aisha and Crawford, Jeremy Chase and Clemens, E. Bridie and Nguyen, Thi H. O. and Kedzierska, Katherine and La Gruta, Nicole L. and Bradley, Philip and Thomas, Paul G.},
  date = {2017-07},
  journaltitle = {Nature},
  volume = {547},
  number = {7661},
  pages = {89--93},
  publisher = {Nature Publishing Group},
  issn = {1476-4687},
  doi = {10.1038/nature22383},
  url = {https://www.nature.com/articles/nature22383},
  abstract = {The authors characterize epitope-specific T cell repertoires, identify shared and recognizable features of TCRs, and develop tools to classify antigen specificity on the basis of sequence analysis.},
  issue = {7661},
  langid = {english},
  keywords = {Bioinformatics,VDJ recombination},
  annotation = {444 citations (Crossref) [2022-09-16]},
  file = {/Users/Nasy/Zotero/storage/68GPMTI9/Dash et al. - 2017 - Quantifiable predictive features define epitope-sp.pdf;/Users/Nasy/Zotero/storage/DLBIDI9H/nature22383.html}
}

@inproceedings{deanLargeScaleDistributed2012,
  title = {Large {{Scale Distributed Deep Networks}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Dean, Jeffrey and Corrado, Greg and Monga, Rajat and Chen, Kai and Devin, Matthieu and Mao, Mark and aurelio Ranzato, Marc' and Senior, Andrew and Tucker, Paul and Yang, Ke and Le, Quoc and Ng, Andrew},
  editor = {Pereira, F. and Burges, C. J. C. and Bottou, L. and Weinberger, K. Q.},
  date = {2012},
  volume = {25},
  publisher = {Curran Associates, Inc.},
  url = {https://proceedings.neurips.cc/paper/2012/file/6aca97005c68f1206823815f66102863-Paper.pdf},
  keywords = {nosource}
}

@software{DeepLearningTuning2023,
  title = {Deep {{Learning Tuning Playbook}}},
  date = {2023-03-24T03:56:33Z},
  origdate = {2023-01-18T23:32:32Z},
  url = {https://github.com/google-research/tuning_playbook},
  abstract = {A playbook for systematically maximizing the performance of deep learning models.},
  organization = {Google Research}
}

@online{defazioRoadLessScheduled2024,
  title = {The {{Road Less Scheduled}}},
  author = {Defazio, Aaron and Yang, Xingyu Alice and Mehta, Harsh and Mishchenko, Konstantin and Khaled, Ahmed and Cutkosky, Ashok},
  date = {2024-10-29},
  eprint = {2405.15682},
  eprinttype = {arXiv},
  doi = {10.48550/arXiv.2405.15682},
  url = {http://arxiv.org/abs/2405.15682},
  urldate = {2024-11-08},
  abstract = {Existing learning rate schedules that do not require specification of the optimization stopping step T are greatly out-performed by learning rate schedules that depend on T. We propose an approach that avoids the need for this stopping time by eschewing the use of schedules entirely, while exhibiting state-of-the-art performance compared to schedules across a wide family of problems ranging from convex problems to large-scale deep learning problems. Our Schedule-Free approach introduces no additional hyper-parameters over standard optimizers with momentum. Our method is a direct consequence of a new theory we develop that unifies scheduling and iterate averaging. An open source implementation of our method is available at https://github.com/facebookresearch/schedule\_free. Schedule-Free AdamW is the core algorithm behind our winning entry to the MLCommons 2024 AlgoPerf Algorithmic Efficiency Challenge Self-Tuning track.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Mathematics - Optimization and Control,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/VSWEFGFH/Defazio et al. - 2024 - The Road Less Scheduled.pdf;/Users/Nasy/Zotero/storage/D423FLQB/2405.html}
}

@article{deneuterFeasibilityMiningCD82018,
  title = {On the Feasibility of Mining {{CD8}}+ {{T}} Cell Receptor Patterns Underlying Immunogenic Peptide Recognition},
  author = {De Neuter, Nicolas and Bittremieux, Wout and Beirnaert, Charlie and Cuypers, Bart and Mrzic, Aida and Moris, Pieter and Suls, Arvid and Van Tendeloo, Viggo and Ogunjimi, Benson and Laukens, Kris and Meysman, Pieter},
  date = {2018-03-01},
  journaltitle = {Immunogenetics},
  volume = {70},
  number = {3},
  pages = {159--168},
  issn = {1432-1211},
  doi = {10.1007/s00251-017-1023-5},
  url = {https://doi.org/10.1007/s00251-017-1023-5},
  abstract = {Current T cell epitope prediction tools are a valuable resource in designing targeted immunogenicity experiments. They typically focus on, and are able to, accurately predict peptide binding and presentation by major histocompatibility complex (MHC) molecules on the surface of antigen-presenting cells. However, recognition of the peptide-MHC complex by a T cell receptor (TCR) is often not included in these tools. We developed a classification approach based on random forest classifiers to predict recognition of a peptide by a T cell receptor and discover patterns that contribute to recognition. We considered two approaches to solve this problem: (1) distinguishing between two sets of TCRs that each bind to a known peptide and (2) retrieving TCRs that bind to a given peptide from a large pool of TCRs. Evaluation of the models on two HIV-1, B*08-restricted epitopes reveals good performance and hints towards structural CDR3 features that can determine peptide immunogenicity. These results are of particular importance as they show that prediction of T cell epitope and T cell epitope recognition based on sequence data is a feasible approach. In addition, the validity of our models not only serves as a proof of concept for the prediction of immunogenic T cell epitopes but also paves the way for more general and high-performing models.},
  langid = {english},
  keywords = {Bioinformatics,Immunoinformatics,Random forest classifier,T cell epitope prediction,T cell receptor},
  annotation = {37 citations (Crossref) [2022-09-16]},
  file = {/Users/Nasy/Zotero/storage/QGX6ZLWI/De Neuter et al. - 2018 - On the feasibility of mining CD8+ T cell receptor .pdf}
}

@article{dissanayakeNeuralnetworkbasedApproximationsSolving1994,
  title = {Neural-Network-Based Approximations for Solving Partial Differential Equations},
  author = {Dissanayake, M. W. M. G. and Phan-Thien, N.},
  date = {1994},
  journaltitle = {Communications in Numerical Methods in Engineering},
  volume = {10},
  number = {3},
  pages = {195--201},
  issn = {1099-0887},
  doi = {10.1002/cnm.1640100303},
  url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/cnm.1640100303},
  abstract = {A numerical method, based on neural-network-based functions, for solving partial differential equations is reported in the paper. Using a ‘universal approximator’ based on a neural network and point collocation, the numerical problem of solving the partial differential equation is transformed to an unconstrained minimization problem. The method is extremely easy to implement and is suitable for obtaining an approximate solution in a short period of time. The technique is illustrated with the aid of two numerical examples.},
  langid = {english},
  annotation = {137 citations (Crossref) [2022-11-04]},
  file = {/Users/Nasy/Zotero/storage/RUWHGICI/Dissanayake and Phan-Thien - 1994 - Neural-network-based approximations for solving pa.pdf;/Users/Nasy/Zotero/storage/XZ9EG6CZ/cnm.html}
}

@online{dongSurveyIncontextLearning2023,
  title = {A {{Survey}} on {{In-context Learning}}},
  author = {Dong, Qingxiu and Li, Lei and Dai, Damai and Zheng, Ce and Wu, Zhiyong and Chang, Baobao and Sun, Xu and Xu, Jingjing and Li, Lei and Sui, Zhifang},
  date = {2023-02-07},
  eprint = {2301.00234},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/2301.00234},
  abstract = {With the increasing ability of large language models (LLMs), in-context learning (ICL) has become a new paradigm for natural language processing (NLP), where LLMs make predictions only based on contexts augmented with a few examples. It has been a new trend to explore ICL to evaluate and extrapolate the ability of LLMs. In this paper, we aim to survey and summarize the progress and challenges of ICL. We first present a formal definition of ICL and clarify its correlation to related studies. Then, we organize and discuss advanced techniques, including training strategies, demonstration designing strategies, as well as related analysis. Finally, we discuss the challenges of ICL and provide potential directions for further research. We hope that our work can encourage more research on uncovering how ICL works and improving ICL.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language},
  file = {/Users/Nasy/Zotero/storage/WLYSRMUD/Dong et al. - 2023 - A Survey on In-context Learning.pdf;/Users/Nasy/Zotero/storage/AFY7KFUB/2301.html}
}

@article{dunnThreeEsCancer2004,
  title = {The {{Three Es}} of {{Cancer Immunoediting}}},
  author = {Dunn, Gavin P. and Old, Lloyd J. and Schreiber, Robert D.},
  date = {2004},
  journaltitle = {Annual Review of Immunology},
  volume = {22},
  number = {1},
  eprint = {15032581},
  eprinttype = {pmid},
  pages = {329--360},
  doi = {10.1146/annurev.immunol.22.012703.104803},
  url = {https://doi.org/10.1146/annurev.immunol.22.012703.104803},
  abstract = {After a century of controversy, the notion that the immune system regulates cancer development is experiencing a new resurgence. An overwhelming amount of data from animal models—together with compelling data from human patients—indicate that a functional cancer immunosurveillance process indeed exists that acts as an extrinsic tumor suppressor. However, it has also become clear that the immune system can facilitate tumor progression, at least in part, by sculpting the immunogenic phenotype of tumors as they develop. The recognition that immunity plays a dual role in the complex interactions between tumors and the host prompted a refinement of the cancer immunosurveillance hypothesis into one termed “cancer immunoediting.” In this review, we summarize the history of the cancer immunosurveillance controversy and discuss its resolution and evolution into the three Es of cancer immunoediting—elimination, equilibrium, and escape.},
  keywords = {nosource},
  annotation = {1840 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/V9HKCDRG/Dunn et al. - 2004 - The three Es of cancer immunoediting.pdf;/Users/Nasy/Zotero/storage/Y693ABEZ/download.html}
}

@article{ehrlichSwarmTCRComputationalApproach2021,
  title = {{{SwarmTCR}}: A Computational Approach to Predict the Specificity of {{T}} Cell Receptors},
  shorttitle = {{{SwarmTCR}}},
  author = {Ehrlich, Ryan and Kamga, Larisa and Gil, Anna and Luzuriaga, Katherine and Selin, Liisa K. and Ghersi, Dario},
  date = {2021-09-07},
  journaltitle = {BMC Bioinformatics},
  volume = {22},
  number = {1},
  pages = {422},
  issn = {1471-2105},
  doi = {10.1186/s12859-021-04335-w},
  url = {https://doi.org/10.1186/s12859-021-04335-w},
  abstract = {With more T cell receptor sequence data becoming available, the need for bioinformatics approaches to predict T cell receptor specificity is even more pressing. Here we present SwarmTCR, a method that uses labeled sequence data to predict the specificity of T cell receptors using a nearest-neighbor approach. SwarmTCR works by optimizing the weights of the individual CDR regions to maximize classification performance.},
  keywords = {Binding specificity,Immunoinformatics,TCR},
  annotation = {2 citations (Crossref) [2022-09-16]},
  file = {/Users/Nasy/Zotero/storage/N3DPS4AK/Ehrlich et al. - 2021 - SwarmTCR a computational approach to predict the .pdf;/Users/Nasy/Zotero/storage/58DWQ94J/s12859-021-04335-w.html}
}

@article{elnaggarProtTransCrackingLanguage2021,
  title = {{{ProtTrans}}: {{Towards Cracking}} the {{Language}} of {{Lifes Code Through Self-Supervised Deep Learning}} and {{High Performance Computing}}},
  shorttitle = {{{ProtTrans}}},
  author = {Elnaggar, Ahmed and Heinzinger, Michael and Dallago, Christian and Rehawi, Ghalia and Wang, Yu and Jones, Llion and Gibbs, Tom and Feher, Tamas and Angerer, Christoph and Steinegger, Martin and Bhowmik, Debsindhu and Rost, Burkhard},
  date = {2021},
  journaltitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  pages = {1--1},
  issn = {1939-3539},
  doi = {10.1109/TPAMI.2021.3095381},
  abstract = {Computational biology and bioinformatics provide vast data gold-mines from protein sequences, ideal for Language Models taken from NLP. These LMs reach for new prediction frontiers at low inference costs. Here, we trained two auto-regressive models (Transformer-XL, XLNet) and four auto-encoder models (BERT, Albert, Electra, T5) on data from UniRef and BFD containing up to 393 billion amino acids. The LMs were trained on the Summit supercomputer using 5616 GPUs and TPU Pod up-to 1024 cores. Dimensionality reduction revealed that the raw protein LM-embeddings from unlabeled data captured some biophysical features of protein sequences. We validated the advantage of using the embeddings as exclusive input for several subsequent tasks. The first was a per-residue prediction of protein secondary structure (3-state accuracy Q3=81\%-87\%); the second were per-protein predictions of protein sub-cellular localization (ten-state accuracy: Q10=81\%) and membrane vs. water soluble (2-state accuracy Q2=91\%). For the per-residue predictions the transfer of the most informative embeddings (ProtT5) for the first time outperformed the state-of-the-art without using evolutionary information thereby bypassing expensive database searches. Taken together, the results implied that protein LMs learned some of the grammar of the language of life. To facilitate future work, we released our models at https://github.com/agemagician/ProtTrans.},
  eventtitle = {{{IEEE Transactions}} on {{Pattern Analysis}} and {{Machine Intelligence}}},
  keywords = {Amino acids,Computational Biology,Computational modeling,Databases,Deep Learning,High Performance Computing,Language Modeling,Machine Learning,Proteins,Task analysis,Three-dimensional displays,Training},
  annotation = {70 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/9SEHRTH7/Elnaggar et al. - 2021 - ProtTrans Towards Cracking the Language of Lifes .pdf;/Users/Nasy/Zotero/storage/K2EIAJCM/9477085.html}
}

@unpublished{ericksonAutoGluonTabularRobustAccurate2020,
  title = {{{AutoGluon-Tabular}}: {{Robust}} and {{Accurate AutoML}} for {{Structured Data}}},
  shorttitle = {{{AutoGluon-Tabular}}},
  author = {Erickson, Nick and Mueller, Jonas and Shirkov, Alexander and Zhang, Hang and Larroy, Pedro and Li, Mu and Smola, Alexander},
  date = {2020-03-13},
  eprint = {2003.06505},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/2003.06505},
  urldate = {2021-05-17},
  abstract = {We introduce AutoGluon-Tabular, an open-source AutoML framework that requires only a single line of Python to train highly accurate machine learning models on an unprocessed tabular dataset such as a CSV file. Unlike existing AutoML frameworks that primarily focus on model/hyperparameter selection, AutoGluon-Tabular succeeds by ensembling multiple models and stacking them in multiple layers. Experiments reveal that our multi-layer combination of many models offers better use of allocated training time than seeking out the best. A second contribution is an extensive evaluation of public and commercial AutoML platforms including TPOT, H2O, AutoWEKA, auto-sklearn, AutoGluon, and Google AutoML Tables. Tests on a suite of 50 classification and regression tasks from Kaggle and the OpenML AutoML Benchmark reveal that AutoGluon is faster, more robust, and much more accurate. We find that AutoGluon often even outperforms the best-in-hindsight combination of all of its competitors. In two popular Kaggle competitions, AutoGluon beat 99\% of the participating data scientists after merely 4h of training on the raw data.},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/CPDED8P2/Erickson et al. - 2020 - AutoGluon-Tabular Robust and Accurate AutoML for .pdf;/Users/Nasy/Zotero/storage/LX5F86CV/2003.html}
}

@article{fangAttentionawareContrastiveLearning2022,
  title = {Attention-Aware Contrastive Learning for Predicting {{T}} Cell Receptor-Antigen Binding Specificity},
  author = {Fang, Yiming and Liu, Xuejun and Liu, Hui},
  date = {2022-11-19},
  journaltitle = {Brief Bioinform},
  volume = {23},
  number = {6},
  eprint = {36094087},
  eprinttype = {pmid},
  pages = {bbac378},
  issn = {1477-4054},
  doi = {10.1093/bib/bbac378},
  abstract = {MOTIVATION: It has been proven that only a small fraction of the neoantigens presented by major histocompatibility complex (MHC) class I molecules on the cell surface can elicit T cells. This restriction can be attributed to the binding specificity of T cell receptor (TCR) and peptide-MHC complex (pMHC). Computational prediction of T cells binding to neoantigens is a challenging and unresolved task. RESULTS: In this paper, we proposed an attention-aware contrastive learning model, ATMTCR, to infer the TCR-pMHC binding specificity. For each TCR sequence, we used a transformer encoder to transform it to latent representation, and then masked a percentage of amino acids guided by attention weights to generate its contrastive view. Compared to fully-supervised baseline model, we verified that contrastive learning-based pretraining on large-scale TCR sequences significantly improved the prediction performance of downstream tasks. Interestingly, masking a percentage of amino acids with low attention weights yielded best performance compared to other masking strategies. Comparison experiments on two independent datasets demonstrated our method achieved better performance than other existing algorithms. Moreover, we identified important amino acids and their positional preference through attention weights, which indicated the potential interpretability of our proposed model.},
  langid = {english},
  keywords = {Amino Acids,Attention,attention mechanism,contrastive learning,Histocompatibility Antigens Class I,HLA Antigens,neoantigen,Protein Binding,Receptors Antigen T-Cell,T cell receptor,T-Lymphocytes,TCR–antigen binding},
  file = {/Users/Nasy/Zotero/storage/SJJCPZWZ/Fang et al. - 2022 - Attention-aware contrastive learning for predicting T cell receptor-antigen binding specificity.pdf}
}

@inproceedings{fanMetapathguidedHeterogeneousGraph2019,
  title = {Metapath-Guided {{Heterogeneous Graph Neural Network}} for {{Intent Recommendation}}},
  booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Fan, Shaohua and Zhu, Junxiong and Han, Xiaotian and Shi, Chuan and Hu, Linmei and Ma, Biyu and Li, Yongliang},
  date = {2019-07-25},
  series = {{{KDD}} '19},
  pages = {2478--2486},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3292500.3330673},
  url = {https://doi.org/10.1145/3292500.3330673},
  urldate = {2022-06-06},
  abstract = {With the prevalence of mobile e-commerce nowadays, a new type of recommendation services, called intent recommendation, is widely used in many mobile e-commerce Apps, such as Taobao and Amazon. Different from traditional query recommendation and item recommendation, intent recommendation is to automatically recommend user intent according to user historical behaviors without any input when users open the App. Intent recommendation becomes very popular in the past two years, because of revealing user latent intents and avoiding tedious input in mobile phones. Existing methods used in industry usually need laboring feature engineering. Moreover, they only utilize attribute and statistic information of users and queries, and fail to take full advantage of rich interaction information in intent recommendation, which may result in limited performances. In this paper, we propose to model the complex objects and rich interactions in intent recommendation as a Heterogeneous Information Network. Furthermore, we present a novel M etapath-guided E mbedding method for I ntent Rec ommendation\textasciitilde (called MEIRec). In order to fully utilize rich structural information, we design a metapath-guided heterogeneous Graph Neural Network to learn the embeddings of objects in intent recommendation. In addition, in order to alleviate huge learning parameters in embeddings, we propose a uniform term embedding mechanism, in which embeddings of objects are made up with the same term embedding space. Offline experiments on real large-scale data show the superior performance of the proposed MEIRec, compared to representative methods.Moreover, the results of online experiments on Taobao e-commerce platform show that MEIRec not only gains a performance improvement of 1.54\% on CTR metric, but also attracts up to 2.66\% of new users to search queries.},
  isbn = {978-1-4503-6201-6},
  keywords = {graph neural network,heterogeneous information network,intent recommendation,recommender systems},
  annotation = {77 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/UJDPNN3W/Fan et al. - 2019 - Metapath-guided Heterogeneous Graph Neural Network.pdf}
}

@article{fengSlidingattentionTransformerNeural2024,
  title = {Sliding-Attention Transformer Neural Architecture for Predicting {{T}} Cell Receptor–Antigen–Human Leucocyte Antigen Binding},
  author = {Feng, Ziyan and Chen, Jingyang and Hai, Youlong and Pang, Xuelian and Zheng, Kun and Xie, Chenglong and Zhang, Xiujuan and Li, Shengqing and Zhang, Chengjuan and Liu, Kangdong and Zhu, Lili and Hu, Xiaoyong and Li, Shiliang and Zhang, Jie and Zhang, Kai and Li, Honglin},
  date = {2024-10},
  journaltitle = {Nat Mach Intell},
  volume = {6},
  number = {10},
  pages = {1216--1230},
  publisher = {Nature Publishing Group},
  issn = {2522-5839},
  doi = {10.1038/s42256-024-00901-y},
  url = {https://www.nature.com/articles/s42256-024-00901-y},
  urldate = {2024-10-30},
  abstract = {Neoantigens are promising targets for immunotherapy by eliciting immune response and removing cancer cells with high specificity, low toxicity and ease of personalization. However, identifying effective neoantigens remains difficult because of the complex interactions among T cell receptors, antigens and human leucocyte antigen sequences. In this study, we integrate important physical and biological priors with the Transformer model and propose the physics-inspired sliding transformer (PISTE). In PISTE, the conventional, data-driven attention mechanism is replaced with physics-driven dynamics that steers the positioning of amino acid residues along the gradient field of their interactions. This allows navigating the intricate landscape of biosequence interactions intelligently, leading to improved accuracy in T cell receptor–antigen–human leucocyte antigen binding prediction and robust generalization to rare sequences. Furthermore, PISTE effectively recovers residue-level contact relationships even in the absence of three-dimensional structure training data. We applied PISTE in a multitude of immunogenic tumour types to pinpoint neoantigens and discern neoantigen-reactive T cells. In a prospective study of prostate cancer, 75\% of the patients elicited immune responses through PISTE-predicted neoantigens.},
  langid = {english},
  keywords = {Computational biology and bioinformatics,Immunology},
  file = {/Users/Nasy/Zotero/storage/B43RAZVL/Feng et al. - 2024 - Sliding-attention transformer neural architecture for predicting T cell receptor–antigen–human leuco.pdf}
}

@article{fengSlidingattentionTransformerNeural2024a,
  title = {Sliding-Attention Transformer Neural Architecture for Predicting {{T}} Cell Receptor–Antigen–Human Leucocyte Antigen Binding},
  author = {Feng, Ziyan and Chen, Jingyang and Hai, Youlong and Pang, Xuelian and Zheng, Kun and Xie, Chenglong and Zhang, Xiujuan and Li, Shengqing and Zhang, Chengjuan and Liu, Kangdong and Zhu, Lili and Hu, Xiaoyong and Li, Shiliang and Zhang, Jie and Zhang, Kai and Li, Honglin},
  date = {2024-10},
  journaltitle = {Nat Mach Intell},
  volume = {6},
  number = {10},
  pages = {1216--1230},
  publisher = {Nature Publishing Group},
  issn = {2522-5839},
  doi = {10.1038/s42256-024-00901-y},
  url = {https://www.nature.com/articles/s42256-024-00901-y},
  urldate = {2024-10-30},
  abstract = {Neoantigens are promising targets for immunotherapy by eliciting immune response and removing cancer cells with high specificity, low toxicity and ease of personalization. However, identifying effective neoantigens remains difficult because of the complex interactions among T cell receptors, antigens and human leucocyte antigen sequences. In this study, we integrate important physical and biological priors with the Transformer model and propose the physics-inspired sliding transformer (PISTE). In PISTE, the conventional, data-driven attention mechanism is replaced with physics-driven dynamics that steers the positioning of amino acid residues along the gradient field of their interactions. This allows navigating the intricate landscape of biosequence interactions intelligently, leading to improved accuracy in T cell receptor–antigen–human leucocyte antigen binding prediction and robust generalization to rare sequences. Furthermore, PISTE effectively recovers residue-level contact relationships even in the absence of three-dimensional structure training data. We applied PISTE in a multitude of immunogenic tumour types to pinpoint neoantigens and discern neoantigen-reactive T cells. In a prospective study of prostate cancer, 75\% of the patients elicited immune responses through PISTE-predicted neoantigens.},
  langid = {english},
  keywords = {Computational biology and bioinformatics,Immunology},
  file = {/Users/Nasy/Zotero/storage/BXNWESEF/Feng et al. - 2024 - Sliding-attention transformer neural architecture for predicting T cell receptor–antigen–human leuco.pdf}
}

@online{fokSpontaneousSymmetryBreaking2017,
  title = {Spontaneous {{Symmetry Breaking}} in {{Neural Networks}}},
  author = {Fok, Ricky and An, Aijun and Wang, Xiaogang},
  date = {2017-10-17},
  eprint = {1710.06096},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.1710.06096},
  url = {http://arxiv.org/abs/1710.06096},
  urldate = {2022-11-17},
  abstract = {We propose a framework to understand the unprecedented performance and robustness of deep neural networks using field theory. Correlations between the weights within the same layer can be described by symmetries in that layer, and networks generalize better if such symmetries are broken to reduce the redundancies of the weights. Using a two parameter field theory, we find that the network can break such symmetries itself towards the end of training in a process commonly known in physics as spontaneous symmetry breaking. This corresponds to a network generalizing itself without any user input layers to break the symmetry, but by communication with adjacent layers. In the layer decoupling limit applicable to residual networks (He et al., 2015), we show that the remnant symmetries that survive the non-linear layers are spontaneously broken. The Lagrangian for the non-linear and weight layers together has striking similarities with the one in quantum field theory of a scalar. Using results from quantum field theory we show that our framework is able to explain many experimentally observed phenomena,such as training on random labels with zero error (Zhang et al., 2017), the information bottleneck, the phase transition out of it and gradient variance explosion (Shwartz-Ziv \& Tishby, 2017), shattered gradients (Balduzzi et al., 2017), and many more.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Statistics - Computation},
  file = {/Users/Nasy/Zotero/storage/KZ7Z882G/Fok et al. - 2017 - Spontaneous Symmetry Breaking in Neural Networks.pdf;/Users/Nasy/Zotero/storage/8S8VLYBZ/1710.html}
}

@online{fuMMEComprehensiveEvaluation2023,
  title = {{{MME}}: {{A Comprehensive Evaluation Benchmark}} for {{Multimodal Large Language Models}}},
  shorttitle = {{{MME}}},
  author = {Fu, Chaoyou and Chen, Peixian and Shen, Yunhang and Qin, Yulei and Zhang, Mengdan and Lin, Xu and Qiu, Zhenyu and Lin, Wei and Yang, Jinrui and Zheng, Xiawu and Li, Ke and Sun, Xing and Ji, Rongrong},
  date = {2023-07-01},
  eprint = {2306.13394},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/2306.13394},
  urldate = {2023-07-16},
  abstract = {Multimodal Large Language Model (MLLM) relies on the powerful LLM to perform multimodal tasks, showing amazing emergent abilities in recent studies, such as writing poems based on an image. However, it is difficult for these case studies to fully reflect the performance of MLLM, lacking a comprehensive evaluation. In this paper, we fill in this blank, presenting the first MLLM Evaluation benchmark MME. It measures both perception and cognition abilities on a total of 14 subtasks. In order to avoid data leakage that may arise from direct use of public datasets for evaluation, the annotations of instruction-answer pairs are all manually designed. The concise instruction design allows us to fairly compare MLLMs, instead of struggling in prompt engineering. Besides, with such an instruction, we can also easily carry out quantitative statistics. A total of 12 advanced MLLMs are comprehensively evaluated on our MME, which not only suggests that existing MLLMs still have a large room for improvement, but also reveals the potential directions for the subsequent model optimization.},
  pubstate = {prepublished},
  keywords = {Computer Science - Computer Vision and Pattern Recognition},
  file = {/Users/Nasy/Zotero/storage/BCZRBF5L/Fu et al. - 2023 - MME A Comprehensive Evaluation Benchmark for Mult.pdf;/Users/Nasy/Zotero/storage/L92NZB97/2306.html}
}

@article{gabbardMatchingMatchedFiltering2018,
  title = {Matching {{Matched Filtering}} with {{Deep Networks}} for {{Gravitational-Wave Astronomy}}},
  author = {Gabbard, Hunter and Williams, Michael and Hayes, Fergus and Messenger, Chris},
  date = {2018-04-06},
  journaltitle = {Phys. Rev. Lett.},
  volume = {120},
  number = {14},
  pages = {141103},
  issn = {0031-9007, 1079-7114},
  doi = {10.1103/PhysRevLett.120.141103},
  url = {https://link.aps.org/doi/10.1103/PhysRevLett.120.141103},
  urldate = {2021-03-23},
  issue = {14},
  langid = {english},
  annotation = {94 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/Q2L43KXW/Gabbard et al. - 2018 - Matching Matched Filtering with Deep Networks for .pdf}
}

@inproceedings{ganeaHyperbolicNeuralNetworks2018,
  title = {Hyperbolic {{Neural Networks}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Ganea, Octavian and Becigneul, Gary and Hofmann, Thomas},
  date = {2018},
  volume = {31},
  publisher = {Curran Associates, Inc.},
  url = {https://proceedings.neurips.cc/paper/2018/hash/dbab2adc8f9d078009ee3fa810bea142-Abstract.html},
  urldate = {2023-03-09},
  abstract = {Hyperbolic spaces have recently gained momentum in the context of machine learning due to their high capacity and tree-likeliness properties. However, the representational power of hyperbolic geometry is not yet on par with Euclidean geometry, firstly because of the absence of corresponding hyperbolic neural network layers. Here, we bridge this gap in a principled manner by combining the formalism of Möbius gyrovector spaces with the Riemannian geometry of the Poincaré model of hyperbolic spaces. As a result, we derive hyperbolic versions of important deep learning tools: multinomial logistic regression, feed-forward and recurrent neural networks. This allows to embed sequential data and perform classification in the hyperbolic space. Empirically, we show that, even if hyperbolic optimization tools are limited, hyperbolic sentence embeddings either outperform or are on par with their Euclidean variants on textual entailment and noisy-prefix recognition tasks.},
  keywords = {⛔ No DOI found},
  file = {/Users/Nasy/Zotero/storage/8CW637RN/Ganea et al. - 2018 - Hyperbolic Neural Networks.pdf}
}

@article{gaoPanPeptideMetaLearning2023,
  title = {Pan-{{Peptide Meta Learning}} for {{T-cell}} Receptor–Antigen Binding Recognition},
  author = {Gao, Yicheng and Gao, Yuli and Fan, Yuxiao and Zhu, Chengyu and Wei, Zhiting and Zhou, Chi and Chuai, Guohui and Chen, Qinchang and Zhang, He and Liu, Qi},
  date = {2023-03},
  journaltitle = {Nat Mach Intell},
  volume = {5},
  number = {3},
  pages = {236--249},
  publisher = {Nature Publishing Group},
  issn = {2522-5839},
  doi = {10.1038/s42256-023-00619-3},
  url = {https://www.nature.com/articles/s42256-023-00619-3},
  urldate = {2024-10-30},
  abstract = {The identification of the mechanisms by which T-cell receptors (TCRs) interact with human antigens provides a crucial opportunity to develop new vaccines, diagnostics and immunotherapies. However, the accurate prediction and recognition of TCR–antigen pairing represents a substantial computational challenge in immunology. Existing tools only learn the binding patterns of antigens from many known TCR binding repertoires and fail to recognize antigens that have never been presented to the immune system or for which only a few TCR binding repertoires are known. However, the binding specificity for neoantigens or exogenous peptides is crucial for immune studies and immunotherapy. Therefore, we developed Pan-Peptide Meta Learning (PanPep), a general and robust framework to recognize TCR–antigen binding, by combining the concepts of meta-learning and the neural Turing machine. The neural Turing machine adds external memory to avoid forgetting previously learned tasks, which is used here to accurately predict TCR binding specificity with any peptide, particularly unseen ones. We applied PanPep to various challenging clinical tasks, including (1) qualitatively measuring the clonal expansion of T cells; (2) efficiently sorting responsive T cells in tumour neoantigen therapy; and (3) accurately identifying immune-responsive TCRs in a large cohort from a COVID-19 study. Our comprehensive tests show that PanPep outperforms existing tools. PanPep also offers interpretability, revealing the nature of peptide and TCR interactions in 3D crystal structures. We believe PanPep can be a useful tool to decipher TCR–antigen interactions and that it has broad clinical applications.},
  langid = {english},
  keywords = {Computational models,Machine learning},
  file = {/Users/Nasy/Zotero/storage/BL8USKMP/Gao et al. - 2023 - Pan-Peptide Meta Learning for T-cell receptor–antigen binding recognition.pdf}
}

@unpublished{geDeepMetricLearning2018,
  title = {Deep {{Metric Learning}} with {{Hierarchical Triplet Loss}}},
  author = {Ge, Weifeng and Huang, Weilin and Dong, Dengke and Scott, Matthew R.},
  date = {2018-10-16},
  eprint = {1810.06951},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/1810.06951},
  urldate = {2022-05-10},
  abstract = {We present a novel hierarchical triplet loss (HTL) capable of automatically collecting informative training samples (triplets) via a defined hierarchical tree that encodes global context information. This allows us to cope with the main limitation of random sampling in training a conventional triplet loss, which is a central issue for deep metric learning. Our main contributions are two-fold. (i) we construct a hierarchical class-level tree where neighboring classes are merged recursively. The hierarchical structure naturally captures the intrinsic data distribution over the whole database. (ii) we formulate the problem of triplet collection by introducing a new violate margin, which is computed dynamically based on the designed hierarchical tree. This allows it to automatically select meaningful hard samples with the guide of global context. It encourages the model to learn more discriminative features from visual similar classes, leading to faster convergence and better performance. Our method is evaluated on the tasks of image retrieval and face recognition, where it outperforms the standard triplet loss substantially by 1\%-18\%. It achieves new state-of-the-art performance on a number of benchmarks, with much fewer learning iterations.},
  keywords = {Computer Science - Computer Vision and Pattern Recognition},
  file = {/Users/Nasy/Zotero/storage/82G6P4GX/Ge et al. - 2018 - Deep Metric Learning with Hierarchical Triplet Los.pdf;/Users/Nasy/Zotero/storage/3WHPLGUZ/1810.html}
}

@article{geeAntigenIdentificationOrphan2018,
  title = {Antigen {{Identification}} for {{Orphan T Cell Receptors Expressed}} on {{Tumor-Infiltrating Lymphocytes}}},
  author = {Gee, Marvin H. and Han, Arnold and Lofgren, Shane M. and Beausang, John F. and Mendoza, Juan L. and Birnbaum, Michael E. and Bethune, Michael T. and Fischer, Suzanne and Yang, Xinbo and Gomez-Eerland, Raquel and Bingham, David B. and Sibener, Leah V. and Fernandes, Ricardo A. and Velasco, Andrew and Baltimore, David and Schumacher, Ton N. and Khatri, Purvesh and Quake, Stephen R. and Davis, Mark M. and Garcia, K. Christopher},
  date = {2018-01-25},
  journaltitle = {Cell},
  volume = {172},
  number = {3},
  eprint = {29275860},
  eprinttype = {pmid},
  pages = {549-563.e16},
  issn = {1097-4172},
  doi = {10.1016/j.cell.2017.11.043},
  abstract = {The immune system can mount T~cell responses against tumors; however, the antigen specificities of tumor-infiltrating lymphocytes (TILs) are not well understood. We used yeast-display libraries of peptide-human leukocyte antigen (pHLA) to screen for antigens of "orphan" T~cell receptors (TCRs) expressed on TILs from human colorectal adenocarcinoma. Four TIL-derived TCRs exhibited strong selection for peptides presented in a highly diverse pHLA-A∗02:01 library. Three of the TIL TCRs were specific for non-mutated self-antigens, two of which were present in separate patient tumors, and shared specificity for a non-mutated self-antigen derived from U2AF2. These results show that the exposed recognition surface of MHC-bound peptides accessible to the TCR contains sufficient structural information to enable the reconstruction of sequences of peptide targets for pathogenic TCRs of unknown specificity. This finding underscores the surprising specificity of TCRs for their cognate antigens and enables the facile indentification of tumor antigens through unbiased screening.},
  langid = {english},
  pmcid = {PMC5786495},
  keywords = {Adenocarcinoma,Aged,Animals,antigens,Antigens Neoplasm,cancer,Cell Line Tumor,Cells Cultured,Colorectal Neoplasms,combinatorial biology,HEK293 Cells,HLA-A Antigens,human leukocyte antigen,Humans,ligand identification,Lymphocytes Tumor-Infiltrating,Male,Middle Aged,peptide library,Peptide Library,peptides,Receptors Antigen T-Cell,Sf9 Cells,single-cell sequencing,Spodoptera,T cell,T cell receptor},
  annotation = {174 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/82Z93MZP/Gee et al. - 2018 - Antigen Identification for Orphan T Cell Receptors.pdf}
}

@online{GeminiNovelUniversal,
  title = {Gemini: {{A Novel}} and {{Universal Heterogeneous Graph Information Fusing Framework}} for {{Online Recommendations}} | {{Proceedings}} of the 26th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  url = {https://dl.acm.org/doi/abs/10.1145/3394486.3403388},
  urldate = {2022-06-05},
  file = {/Users/Nasy/Zotero/storage/T8KPRABY/3394486.html}
}

@article{genhedenPaRoutesFrameworkBenchmarking2022,
  title = {{{PaRoutes}}: Towards a Framework for Benchmarking Retrosynthesis Route Predictions},
  shorttitle = {{{PaRoutes}}},
  author = {Genheden, Samuel and Bjerrum, Esben},
  date = {2022-08-08},
  journaltitle = {Digital Discovery},
  volume = {1},
  number = {4},
  pages = {527--539},
  publisher = {RSC},
  issn = {2635-098X},
  doi = {10.1039/D2DD00015F},
  url = {https://pubs.rsc.org/en/content/articlelanding/2022/dd/d2dd00015f},
  urldate = {2024-07-11},
  abstract = {We introduce a framework for benchmarking multi-step retrosynthesis methods, i.e. route predictions, called PaRoutes. The framework consists of two sets of 10 000 synthetic routes extracted from the patent literature, a list of stock compounds, and a curated set of reactions on which one-step retrosynthesis models can be trained. PaRoutes also comes with scripts to compute route quality and route diversity, quantities that are important for comparing methods. As an illustration of the framework, we compare three template-based methods implemented in the AiZynthFinder software: Monte Carlo tree search (MCTS), Retro*, and a depth-first proof-number search (DFPN) algorithm. It is found that DFPN is inferior to both MCTS and Retro* and cannot be recommended in its current implementation. MCTS and Retro* are on a par with regard to search speed and the ability to find routes in which all starting material is in stock. However, MCTS outperforms Retro* when it comes to route quality and route diversity. MCTS more easily recovers the reference routes and tends to find a diverse set of solutions for a greater portion of the targets. Having showcased the benchmark for template-based methods, we discuss potential issues and caveats needed when adapting the framework for other methods,.e.g., template-free methods or expert systems. We will continue to update and expand the application of PaRoutes, and we also encourage practitioners and developers to adapt PaRoutes to their algorithms as we envisage that the framework could become the community standard to compare retrosynthesis route predictions. PaRoutes is available at https://github.com/MolecularAI/PaRoutes.},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/FFVYCM95/Genheden and Bjerrum - 2022 - PaRoutes towards a framework for benchmarking retrosynthesis route predictions.pdf;/Users/Nasy/Zotero/storage/R3YGQ4G4/Genheden and Bjerrum - 2022 - PaRoutes towards a framework for benchmarking retrosynthesis route predictions.pdf}
}

@article{georgeDeepLearningRealtime2018,
  title = {Deep {{Learning}} for {{Real-time Gravitational Wave Detection}} and {{Parameter Estimation}}: {{Results}} with {{Advanced LIGO Data}}},
  shorttitle = {Deep {{Learning}} for {{Real-time Gravitational Wave Detection}} and {{Parameter Estimation}}},
  author = {George, Daniel and Huerta, E. A.},
  date = {2018-03},
  journaltitle = {Physics Letters B},
  volume = {778},
  eprint = {1711.03121},
  eprinttype = {arXiv},
  pages = {64--70},
  issn = {03702693},
  doi = {10.1016/j.physletb.2017.12.053},
  url = {http://arxiv.org/abs/1711.03121},
  urldate = {2021-03-23},
  abstract = {The recent Nobel-prize-winning detections of gravitational waves from merging black holes and the subsequent detection of the collision of two neutron stars in coincidence with electromagnetic observations have inaugurated a new era of multimessenger astrophysics. To enhance the scope of this emergent field of science, we pioneered the use of deep learning with convolutional neural networks, that take time-series inputs, for rapid detection and characterization of gravitational wave signals. This approach, Deep Filtering, was initially demonstrated using simulated LIGO noise. In this article, we present the extension of Deep Filtering using real data from LIGO, for both detection and parameter estimation of gravitational waves from binary black hole mergers using continuous data streams from multiple LIGO detectors. We demonstrate for the first time that machine learning can detect and estimate the true parameters of real events observed by LIGO. Our results show that Deep Filtering achieves similar sensitivities and lower errors compared to matched-filtering while being far more computationally efficient and more resilient to glitches, allowing real-time processing of weak time-series signals in non-stationary non-Gaussian noise with minimal resources, and also enables the detection of new classes of gravitational wave sources that may go unnoticed with existing detection algorithms. This unified framework for data analysis is ideally suited to enable coincident detection campaigns of gravitational waves and their multimessenger counterparts in real-time.},
  langid = {english},
  keywords = {Astrophysics - High Energy Astrophysical Phenomena,Astrophysics - Instrumentation and Methods for Astrophysics,Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing,General Relativity and Quantum Cosmology},
  annotation = {170 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/VR5UWFVI/George and Huerta - 2018 - Deep Learning for Real-time Gravitational Wave Det.pdf}
}

@article{georgeDeepNeuralNetworks2018,
  title = {Deep Neural Networks to Enable Real-Time Multimessenger Astrophysics},
  author = {George, Daniel and Huerta, E. A.},
  date = {2018-02-26},
  journaltitle = {Phys. Rev. D},
  volume = {97},
  number = {4},
  pages = {044039},
  issn = {2470-0010, 2470-0029},
  doi = {10.1103/PhysRevD.97.044039},
  url = {https://link.aps.org/doi/10.1103/PhysRevD.97.044039},
  urldate = {2021-03-23},
  issue = {4},
  langid = {english},
  annotation = {119 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/AR6BXEWJ/George and Huerta - 2018 - Deep neural networks to enable real-time multimess.pdf}
}

@article{gielisDetectionEnrichedCell2019,
  title = {Detection of {{Enriched T Cell Epitope Specificity}} in {{Full T Cell Receptor Sequence Repertoires}}},
  author = {Gielis, Sofie and Moris, Pieter and Bittremieux, Wout and De Neuter, Nicolas and Ogunjimi, Benson and Laukens, Kris and Meysman, Pieter},
  date = {2019},
  journaltitle = {Frontiers in Immunology},
  volume = {10},
  issn = {1664-3224},
  doi = {10.3389/fimmu.2019.02820},
  url = {https://www.frontiersin.org/articles/10.3389/fimmu.2019.02820},
  abstract = {High-throughput T cell receptor (TCR) sequencing allows the characterization of an individual's TCR repertoire and directly queries their immune state. However, it remains a non-trivial task to couple these sequenced TCRs to their antigenic targets. In this paper, we present a novel strategy to annotate full TCR sequence repertoires with their epitope specificities. The strategy is based on a machine learning algorithm to learn the TCR patterns common to the recognition of a specific epitope. These results are then combined with a statistical analysis to evaluate the occurrence of specific epitope-reactive TCR sequences per epitope in repertoire data. In this manner, we can directly study the capacity of full TCR repertoires to target specific epitopes of the relevant vaccines or pathogens. We demonstrate the usability of this approach on three independent datasets related to vaccine monitoring and infectious disease diagnostics by independently identifying the epitopes that are targeted by the TCR repertoire. The developed method is freely available as a web tool for academic use at tcrex.biodatamining.be.},
  file = {/Users/Nasy/Zotero/storage/CLIB8YPT/Gielis et al. - 2019 - Detection of Enriched T Cell Epitope Specificity i.pdf}
}

@inproceedings{gilmerNeuralMessagePassing2017,
  title = {Neural Message Passing for {{Quantum}} Chemistry},
  booktitle = {Proceedings of the 34th {{International Conference}} on {{Machine Learning}} - {{Volume}} 70},
  author = {Gilmer, Justin and Schoenholz, Samuel S. and Riley, Patrick F. and Vinyals, Oriol and Dahl, George E.},
  date = {2017-08-06},
  series = {{{ICML}}'17},
  pages = {1263--1272},
  publisher = {JMLR.org},
  location = {Sydney, NSW, Australia},
  abstract = {Supervised learning on molecules has incredible potential to be useful in chemistry, drug discovery, and materials science. Luckily, several promising and closely related neural network models invariant to molecular symmetries have already been described in the literature. These models learn a message passing algorithm and aggregation procedure to compute a function of their entire input graph. At this point, the next step is to find a particularly effective variant of this general approach and apply it to chemical prediction benchmarks until we either solve them or reach the limits of the approach. In this paper, we reformulate existing models into a single common framework we call Message Passing Neural Networks (MPNNs) and explore additional novel variations within this framework. Using MPNNs we demonstrate state of the art results on an important molecular property prediction benchmark; these results are strong enough that we believe future work should focus on datasets with larger molecules or more accurate ground truth labels.},
  file = {/Users/Nasy/Zotero/storage/MXHW8M76/Gilmer et al. - 2017 - Neural message passing for Quantum chemistry.pdf}
}

@article{glanvilleIdentifyingSpecificityGroups2017,
  title = {Identifying Specificity Groups in the {{T}} Cell Receptor Repertoire},
  author = {Glanville, Jacob and Huang, Huang and Nau, Allison and Hatton, Olivia and Wagar, Lisa E. and Rubelt, Florian and Ji, Xuhuai and Han, Arnold and Krams, Sheri M. and Pettus, Christina and Haas, Nikhil and Arlehamn, Cecilia S. Lindestam and Sette, Alessandro and Boyd, Scott D. and Scriba, Thomas J. and Martinez, Olivia M. and Davis, Mark M.},
  date = {2017-07},
  journaltitle = {Nature},
  volume = {547},
  number = {7661},
  pages = {94--98},
  publisher = {Nature Publishing Group},
  issn = {1476-4687},
  doi = {10.1038/nature22976},
  url = {https://www.nature.com/articles/nature22976},
  urldate = {2022-03-19},
  abstract = {The authors devise an algorithm that can cluster T cell receptor (TCR) sequences sharing the same specificity, predict the HLA restriction of these TCR clusters on the basis of subjects’ genotypes and help to identify specific peptide major histocompatibility complex ligands.},
  issue = {7661},
  langid = {english},
  keywords = {Adaptive immunity,Protein sequence analyses,Statistical methods,Systems analysis,T-cell receptor},
  annotation = {541 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/4XL2F2S4/Glanville et al. - 2017 - Identifying specificity groups in the T cell recep.pdf;/Users/Nasy/Zotero/storage/AALKFNIY/nature22976.html}
}

@inproceedings{glorotUnderstandingDifficultyTraining2010,
  title = {Understanding the Difficulty of Training Deep Feedforward Neural Networks},
  booktitle = {Proceedings of the {{Thirteenth International Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
  author = {Glorot, Xavier and Bengio, Yoshua},
  date = {2010-03-31},
  pages = {249--256},
  publisher = {{JMLR Workshop and Conference Proceedings}},
  issn = {1938-7228},
  url = {https://proceedings.mlr.press/v9/glorot10a.html},
  urldate = {2023-03-10},
  abstract = {Whereas before 2006 it appears that deep multi-layer neural networks were not successfully trained, since then several algorithms have been shown to successfully train them, with experimental results showing the superiority of deeper vs less deep architectures. All these experimental results were obtained with new initialization or training mechanisms. Our objective here is to understand better why standard gradient descent from random initialization is doing so poorly with deep neural networks, to better understand these recent relative successes and help design better algorithms in the future.  We first observe the influence of the non-linear activations functions. We find that the logistic sigmoid activation is unsuited for deep networks with random initialization because of its mean value, which can drive especially the top hidden layer into saturation. Surprisingly, we find that saturated units can move out of saturation by themselves, albeit slowly, and explaining the plateaus sometimes seen when training neural networks. We find that a new non-linearity that saturates less can often be beneficial. Finally, we study how activations and gradients vary across layers and during training, with the idea that training may be more difficult when the singular values of the Jacobian associated with each layer are far from 1.  Based on these considerations, we propose a new initialization scheme that brings substantially faster convergence.},
  eventtitle = {Proceedings of the {{Thirteenth International Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/C4Y83VVY/Glorot and Bengio - 2010 - Understanding the difficulty of training deep feed.pdf}
}

@online{goffinetMATEPredMultimodalAttentionbased2024,
  title = {{{MATE-Pred}}: {{Multimodal Attention-based TCR-Epitope}} Interaction {{Predictor}}},
  shorttitle = {{{MATE-Pred}}},
  author = {Goffinet, Etienne and Mall, Raghvendra and Singh, Ankita and Kaushik, Rahul and Castiglione, Filippo},
  date = {2024-01-08},
  eprinttype = {bioRxiv},
  eprintclass = {New Results},
  pages = {2024.01.07.574567},
  doi = {10.1101/2024.01.07.574567},
  url = {https://www.biorxiv.org/content/10.1101/2024.01.07.574567v1},
  urldate = {2024-10-07},
  abstract = {An accurate binding affinity prediction between T-cell receptors and epitopes contributes decisively to develop successful immunotherapy strategies. Some state-of-the-art computational methods implement deep learning techniques by integrating evolutionary features to convert the amino acid residues of cell receptors and epitope sequences into numerical values, while some other methods employ pre-trained language models to summarize the embedding vectors at the amino acid residue level to obtain sequence-wise representations. Here, we propose a highly reliable novel method, MATE-Pred, that performs multi-modal attention-based prediction of T-cell receptors and epitopes binding affinity. The MATE-Pred is compared and benchmarked with other deep learning models that leverage multi-modal representations of T-cell receptors and epitopes. In the proposed method, the textual representation of proteins is embedded with a pre-trained bi-directional encoder model and combined with two additional modalities: a) a comprehensive set of selected physicochemical properties; b) predicted contact maps that estimate the 3D distances between amino acid residues in the sequences. The MATE-Pred demonstrates the potential of multi-modal model in achieving state-of-the-art performance (+8.4\% MCC, +5.5\% AUC compared to baselines) and efficiently capturing contextual, physicochemical, and structural information from amino acid residues. The performance of MATE-Pred projects its potential application in various drug discovery regimes.},
  langid = {english},
  pubstate = {prepublished},
  file = {/Users/Nasy/Zotero/storage/27SWFMNW/Goffinet et al. - 2024 - MATE-Pred Multimodal Attention-based TCR-Epitope interaction Predictor.pdf}
}

@online{GoogleTranslate,
  title = {Google {{Translate}}},
  url = {https://translate.google.com/?sl=auto&tl=zh-CN&text=%E8%BF%99%E7%AF%87%E6%96%87%E7%AB%A0%E4%BD%BF%E7%94%A8%E7%9A%84%E6%98%AF%20docking%20energy%20%E6%9D%A5%E8%AE%A1%E7%AE%97%20TCR-peptide%20%E7%9B%B4%E6%8E%A5%E7%9A%84%E7%BB%93%E5%90%88%E8%83%BD%E3%80%82%E5%AE%83%E4%B9%9F%E8%AF%B4%E4%BA%86%EF%BC%8C&op=translate},
  urldate = {2022-09-16},
  file = {/Users/Nasy/Zotero/storage/45KWT3LZ/translate.google.com.html}
}

@article{greeneReviewGallotHulin1989,
  title = {Review: {{S}}. {{Gallot}}, {{D}}. {{Hulin}} and {{J}}. {{Lafontaine}}, {{Riemannian}} Geometry},
  shorttitle = {Review},
  author = {Greene, Robert E.},
  date = {1989-07},
  journaltitle = {Bulletin (New Series) of the American Mathematical Society},
  volume = {21},
  number = {1},
  pages = {157--162},
  publisher = {American Mathematical Society},
  issn = {0273-0979, 1088-9485},
  doi = {10.1090/S0273-0979-1989-15802-3},
  url = {https://projecteuclid.org/journals/bulletin-of-the-american-mathematical-society-new-series/volume-21/issue-1/Review--S-Gallot-D-Hulin-and-J-Lafontaine-Riemannian/bams/1183555145.full},
  urldate = {2023-03-10},
  abstract = {Bulletin (New Series) of the American Mathematical Society},
  file = {/Users/Nasy/Zotero/storage/SHBBIS4A/Greene - 1989 - Review S. Gallot, D. Hulin and J. Lafontaine, Rie.pdf}
}

@book{guggenheimerDifferentialGeometry2012,
  title = {Differential {{Geometry}}},
  author = {Guggenheimer, Heinrich W.},
  date = {2012-04-27},
  eprint = {kavDAgAAQBAJ},
  eprinttype = {googlebooks},
  publisher = {Courier Corporation},
  abstract = {This is a text of local differential geometry considered as an application of advanced calculus and linear algebra. The discussion is designed for advanced undergraduate or beginning graduate study, and presumes of readers only a fair knowledge of matrix algebra and of advanced calculus of functions of several real variables. The author, who is a Professor of Mathematics at the Polytechnic Institute of New York, begins with a discussion of plane geometry and then treats the local theory of Lie groups and transformation groups, solid differential geometry, and Riemannian geometry, leading to a general theory of connections. The author presents a full development of the Erlangen Program in the foundations of geometry as used by Elie Cartan as a basis of modern differential geometry; the book can serve as an introduction to the methods of E. Cartan. The theory is applied to give a complete development of affine differential geometry in two and three dimensions. Although the text deals only with local problems (except for global problems that can be treated by methods of advanced calculus), the definitions have been formulated so as to be applicable to modern global differential geometry. The algebraic development of tensors is equally accessible to physicists and to pure mathematicians. The wealth of specific resutls and the replacement of most tensor calculations by linear algebra makes the book attractive to users of mathematics in other disciplines.},
  isbn = {978-0-486-15720-7},
  langid = {english},
  pagetotal = {716},
  keywords = {Mathematics / Geometry / Differential}
}

@article{guoRetrosynthesisZeroSelfImproving2024,
  title = {Retrosynthesis {{Zero}}: {{Self-Improving Global Synthesis Planning Using Reinforcement Learning}}},
  shorttitle = {Retrosynthesis {{Zero}}},
  author = {Guo, Jiasheng and Yu, Chenning and Li, Kenan and Zhang, Yijian and Wang, Guoqiang and Li, Shuhua and Dong, Hao},
  date = {2024-06-11},
  journaltitle = {J. Chem. Theory Comput.},
  volume = {20},
  number = {11},
  pages = {4921--4938},
  publisher = {American Chemical Society},
  issn = {1549-9618},
  doi = {10.1021/acs.jctc.4c00071},
  url = {https://doi.org/10.1021/acs.jctc.4c00071},
  urldate = {2024-07-11},
  abstract = {The field of computer-aided synthesis planning (CASP) has witnessed significant growth in recent years. Still, many CASP programs rely on large data sets to train neural networks, resulting in limitations due to the data quality and prior knowledge from chemists. In response, we propose Retrosynthesis Zero (ReSynZ), a reaction template-based method that combines Monte Carlo Tree Search with reinforcement learning inspired by AlphaGo Zero. Unlike other single-step reaction template-based CASP methods, ReSynZ takes complete synthesis paths for complex molecules, determined by reaction rules, as input for training the neural network. ReSynZ enables neural networks trained with relatively small reaction data sets (tens of thousands of data) to generate multiple synthesis pathways for a target molecule and suggest possible reaction conditions. On multiple data sets of molecular retrosynthesis, ReSynZ demonstrates excellent predictive performance compared to existing algorithms. The advantages, such as self-improving model features, flexible reward settings, the potential to surpass human limitations in chemical synthesis route planning, and others, make ReSynZ a valuable tool in chemical synthesis design.}
}

@article{guoRetrosynthesisZeroSelfImproving2024a,
  title = {Retrosynthesis {{Zero}}: {{Self-Improving Global Synthesis Planning Using Reinforcement Learning}}},
  shorttitle = {Retrosynthesis {{Zero}}},
  author = {Guo, Jiasheng and Yu, Chenning and Li, Kenan and Zhang, Yijian and Wang, Guoqiang and Li, Shuhua and Dong, Hao},
  date = {2024-06-11},
  journaltitle = {J. Chem. Theory Comput.},
  volume = {20},
  number = {11},
  pages = {4921--4938},
  issn = {1549-9618},
  doi = {10.1021/acs.jctc.4c00071},
  url = {https://doi.org/10.1021/acs.jctc.4c00071},
  urldate = {2024-07-11},
  abstract = {The field of computer-aided synthesis planning (CASP) has witnessed significant growth in recent years. Still, many CASP programs rely on large data sets to train neural networks, resulting in limitations due to the data quality and prior knowledge from chemists. In response, we propose Retrosynthesis Zero (ReSynZ), a reaction template-based method that combines Monte Carlo Tree Search with reinforcement learning inspired by AlphaGo Zero. Unlike other single-step reaction template-based CASP methods, ReSynZ takes complete synthesis paths for complex molecules, determined by reaction rules, as input for training the neural network. ReSynZ enables neural networks trained with relatively small reaction data sets (tens of thousands of data) to generate multiple synthesis pathways for a target molecule and suggest possible reaction conditions. On multiple data sets of molecular retrosynthesis, ReSynZ demonstrates excellent predictive performance compared to existing algorithms. The advantages, such as self-improving model features, flexible reward settings, the potential to surpass human limitations in chemical synthesis route planning, and others, make ReSynZ a valuable tool in chemical synthesis design.}
}

@article{guoRetrosynthesisZeroSelfImproving2024b,
  title = {Retrosynthesis {{Zero}}: {{Self-Improving Global Synthesis Planning Using Reinforcement Learning}}},
  shorttitle = {Retrosynthesis {{Zero}}},
  author = {Guo, Jiasheng and Yu, Chenning and Li, Kenan and Zhang, Yijian and Wang, Guoqiang and Li, Shuhua and Dong, Hao},
  date = {2024-06-11},
  journaltitle = {J. Chem. Theory Comput.},
  volume = {20},
  number = {11},
  pages = {4921--4938},
  publisher = {American Chemical Society},
  issn = {1549-9618},
  doi = {10.1021/acs.jctc.4c00071},
  url = {https://doi.org/10.1021/acs.jctc.4c00071},
  urldate = {2024-07-11},
  abstract = {The field of computer-aided synthesis planning (CASP) has witnessed significant growth in recent years. Still, many CASP programs rely on large data sets to train neural networks, resulting in limitations due to the data quality and prior knowledge from chemists. In response, we propose Retrosynthesis Zero (ReSynZ), a reaction template-based method that combines Monte Carlo Tree Search with reinforcement learning inspired by AlphaGo Zero. Unlike other single-step reaction template-based CASP methods, ReSynZ takes complete synthesis paths for complex molecules, determined by reaction rules, as input for training the neural network. ReSynZ enables neural networks trained with relatively small reaction data sets (tens of thousands of data) to generate multiple synthesis pathways for a target molecule and suggest possible reaction conditions. On multiple data sets of molecular retrosynthesis, ReSynZ demonstrates excellent predictive performance compared to existing algorithms. The advantages, such as self-improving model features, flexible reward settings, the potential to surpass human limitations in chemical synthesis route planning, and others, make ReSynZ a valuable tool in chemical synthesis design.}
}

@unpublished{haDeepRankingAdaptive2021,
  title = {Deep {{Ranking}} with {{Adaptive Margin Triplet Loss}}},
  author = {Ha, Mai Lan and Blanz, Volker},
  date = {2021-07-13},
  eprint = {2107.06187},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/2107.06187},
  urldate = {2022-05-12},
  abstract = {We propose a simple modification from a fixed margin triplet loss to an adaptive margin triplet loss. While the original triplet loss is used widely in classification problems such as face recognition, face re-identification and fine-grained similarity, our proposed loss is well suited for rating datasets in which the ratings are continuous values. In contrast to original triplet loss where we have to sample data carefully, in out method, we can generate triplets using the whole dataset, and the optimization can still converge without frequently running into a model collapsing issue. The adaptive margins only need to be computed once before the training, which is much less expensive than generating triplets after every epoch as in the fixed margin case. Besides substantially improved training stability (the proposed model never collapsed in our experiments compared to a couple of times that the training collapsed on existing triplet loss), we achieved slightly better performance than the original triplet loss on various rating datasets and network architectures.},
  keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/B364BGDZ/Ha and Blanz - 2021 - Deep Ranking with Adaptive Margin Triplet Loss.pdf;/Users/Nasy/Zotero/storage/46Q4P2H6/2107.html}
}

@inproceedings{hadsellDimensionalityReductionLearning2006,
  title = {Dimensionality {{Reduction}} by {{Learning}} an {{Invariant Mapping}}},
  booktitle = {2006 {{IEEE Computer Society Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}}'06)},
  author = {Hadsell, R. and Chopra, S. and LeCun, Y.},
  date = {2006-06},
  volume = {2},
  pages = {1735--1742},
  issn = {1063-6919},
  doi = {10.1109/CVPR.2006.100},
  abstract = {Dimensionality reduction involves mapping a set of high dimensional input points onto a low dimensional manifold so that 'similar" points in input space are mapped to nearby points on the manifold. We present a method - called Dimensionality Reduction by Learning an Invariant Mapping (DrLIM) - for learning a globally coherent nonlinear function that maps the data evenly to the output manifold. The learning relies solely on neighborhood relationships and does not require any distancemeasure in the input space. The method can learn mappings that are invariant to certain transformations of the inputs, as is demonstrated with a number of experiments. Comparisons are made to other techniques, in particular LLE.},
  eventtitle = {2006 {{IEEE Computer Society Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}}'06)},
  keywords = {Astronomy,Biology,Data visualization,Extraterrestrial measurements,Feature extraction,Geoscience,Image analysis,Image generation,Manufacturing industries,Service robots},
  annotation = {1115 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/KJFJ4M7B/1640964.html}
}

@unpublished{hamiltonInductiveRepresentationLearning2018,
  title = {Inductive {{Representation Learning}} on {{Large Graphs}}},
  author = {Hamilton, William L. and Ying, Rex and Leskovec, Jure},
  date = {2018-09-10},
  eprint = {1706.02216},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/1706.02216},
  urldate = {2022-03-11},
  abstract = {Low-dimensional embeddings of nodes in large graphs have proved extremely useful in a variety of prediction tasks, from content recommendation to identifying protein functions. However, most existing approaches require that all nodes in the graph are present during training of the embeddings; these previous approaches are inherently transductive and do not naturally generalize to unseen nodes. Here we present GraphSAGE, a general, inductive framework that leverages node feature information (e.g., text attributes) to efficiently generate node embeddings for previously unseen data. Instead of training individual embeddings for each node, we learn a function that generates embeddings by sampling and aggregating features from a node's local neighborhood. Our algorithm outperforms strong baselines on three inductive node-classification benchmarks: we classify the category of unseen nodes in evolving information graphs based on citation and Reddit post data, and we show that our algorithm generalizes to completely unseen graphs using a multi-graph dataset of protein-protein interactions.},
  keywords = {Computer Science - Machine Learning,Computer Science - Social and Information Networks,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/Z62ECJTM/Hamilton et al. - 2018 - Inductive Representation Learning on Large Graphs.pdf;/Users/Nasy/Zotero/storage/MEGZX3PH/1706.html}
}

@online{hanEquivariantGraphHierarchyBased2022,
  title = {Equivariant {{Graph Hierarchy-Based Neural Networks}}},
  author = {Han, Jiaqi and Huang, Wenbing and Xu, Tingyang and Rong, Yu},
  date = {2022-10-15},
  eprint = {2202.10643},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2202.10643},
  url = {http://arxiv.org/abs/2202.10643},
  urldate = {2022-10-20},
  abstract = {Equivariant Graph neural Networks (EGNs) are powerful in characterizing the dynamics of multi-body physical systems. Existing EGNs conduct flat message passing, which, yet, is unable to capture the spatial/dynamical hierarchy for complex systems particularly, limiting substructure discovery and global information fusion. In this paper, we propose Equivariant Hierarchy-based Graph Networks (EGHNs) which consist of the three key components: generalized Equivariant Matrix Message Passing (EMMP) , E-Pool and E-UpPool. In particular, EMMP is able to improve the expressivity of conventional equivariant message passing, E-Pool assigns the quantities of the low-level nodes into high-level clusters, while E-UpPool leverages the high-level information to update the dynamics of the low-level nodes. As their names imply, both E-Pool and E-UpPool are guaranteed to be equivariant to meet physic symmetry. Considerable experimental evaluations verify the effectiveness of our EGHN on several applications including multi-object dynamics simulation, motion capture, and protein dynamics modeling.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/AE6P2PUI/Han et al. - 2022 - Equivariant Graph Hierarchy-Based Neural Networks.pdf;/Users/Nasy/Zotero/storage/JJFKRASF/2202.html}
}

@online{hanLearningPhysicalDynamics2022,
  title = {Learning {{Physical Dynamics}} with {{Subequivariant Graph Neural Networks}}},
  author = {Han, Jiaqi and Huang, Wenbing and Ma, Hengbo and Li, Jiachen and Tenenbaum, Joshua B. and Gan, Chuang},
  date = {2022-10-13},
  eprint = {2210.06876},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2210.06876},
  url = {http://arxiv.org/abs/2210.06876},
  urldate = {2022-10-20},
  abstract = {Graph Neural Networks (GNNs) have become a prevailing tool for learning physical dynamics. However, they still encounter several challenges: 1) Physical laws abide by symmetry, which is a vital inductive bias accounting for model generalization and should be incorporated into the model design. Existing simulators either consider insufficient symmetry, or enforce excessive equivariance in practice when symmetry is partially broken by gravity. 2) Objects in the physical world possess diverse shapes, sizes, and properties, which should be appropriately processed by the model. To tackle these difficulties, we propose a novel backbone, Subequivariant Graph Neural Network, which 1) relaxes equivariance to subequivariance by considering external fields like gravity, where the universal approximation ability holds theoretically; 2) introduces a new subequivariant object-aware message passing for learning physical interactions between multiple objects of various shapes in the particle-based representation; 3) operates in a hierarchical fashion, allowing for modeling long-range and complex interactions. Our model achieves on average over 3\% enhancement in contact prediction accuracy across 8 scenarios on Physion and 2X lower rollout MSE on RigidFall compared with state-of-the-art GNN simulators, while exhibiting strong generalization and data efficiency.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/TR6I8UXL/Han et al. - 2022 - Learning Physical Dynamics with Subequivariant Gra.pdf;/Users/Nasy/Zotero/storage/KV3NJZU3/2210.html}
}

@inproceedings{hanMultimodalDynamicsDynamical2022,
  title = {Multimodal {{Dynamics}}: {{Dynamical Fusion}} for {{Trustworthy Multimodal Classification}}},
  shorttitle = {Multimodal {{Dynamics}}},
  booktitle = {2022 {{IEEE}}/{{CVF Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
  author = {Han, Zongbo and Yang, Fan and Huang, Junzhou and Zhang, Changqing and Yao, Jianhua},
  date = {2022-06},
  pages = {20675--20685},
  publisher = {IEEE},
  location = {New Orleans, LA, USA},
  doi = {10.1109/CVPR52688.2022.02005},
  url = {https://ieeexplore.ieee.org/document/9878603/},
  abstract = {Integration of heterogeneous and high-dimensional data (e.g., multiomics) is becoming increasingly important. Existing multimodal classification algorithms mainly focus on improving performance by exploiting the complementarity from different modalities. However, conventional approaches are basically weak in providing trustworthy multimodal fusion, especially for safety-critical applications (e.g., medical diagnosis). For this issue, we propose a novel trustworthy multimodal classification algorithm termed Multimodal Dynamics, which dynamically evaluates both the feature-level and modality-level informativeness for different samples and thus trustworthily integrates multiple modalities. Specifically, a sparse gating is introduced to capture the information variation of each within-modality feature and the true class probability is employed to assess the classification confidence of each modality. Then a transparent fusion algorithm based on the dynamical informativeness estimation strategy is induced. To the best of our knowledge, this is the first work to jointly model both feature and modality variation for different samples to provide trustworthy fusion in multi-modal classification. Extensive experiments are conducted on multimodal medical classification datasets. In these experiments, superior performance and trustworthiness of our algorithm are clearly validated compared to the state-of-the-art methods.},
  eventtitle = {2022 {{IEEE}}/{{CVF Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
  isbn = {978-1-66546-946-3},
  langid = {english},
  annotation = {1 citations (Crossref) [2023-05-26]},
  file = {/Users/Nasy/Zotero/storage/VQ6XLI7H/Han et al. - 2022 - Multimodal Dynamics Dynamical Fusion for Trustwor.pdf}
}

@online{hanPredictingSARSCoV2Epitopespecific2021,
  title = {Predicting {{SARS-CoV-2}} Epitope-Specific {{TCR}} Recognition Using Pre-Trained Protein Embeddings},
  author = {Han, Youngmahn and Lee, Aeri},
  date = {2021-11-17},
  eprinttype = {bioRxiv},
  eprintclass = {New Results},
  pages = {2021.11.17.468929},
  doi = {10.1101/2021.11.17.468929},
  url = {https://www.biorxiv.org/content/10.1101/2021.11.17.468929v1},
  urldate = {2022-08-19},
  abstract = {The COVID-19 pandemic is ongoing because of the high transmission rate and the emergence of SARS-CoV-2 variants. The P272L mutation in SARS-Cov-2 S-protein is known to be highly relevant to the viral escape associated with the second pandemic wave in Europe. Epitope-specific T-cell receptor (TCR) recognition is a key factor in determining the T-cell immunogenicity of a SARS-CoV-2 epitope. Although several data-driven methods for predicting epitope-specific TCR recognition have been proposed, they remain challenging owing to the enormous diversity of TCRs and the lack of available training data. Self-supervised transfer learning has recently been demonstrated to be powerful for extracting useful information from unlabeled protein sequences and increasing the predictive performance of the fine-tuned models in downstream tasks. Here, we present a predictive model based on Bidirectional Encoder Representations from Transformers (BERT), employing self-supervised transfer learning, to predict SARS-CoV-2 T-cell epitope-specific TCR recognition. The fine-tuned model showed notably high predictive performance for independent evaluation using the SARS-CoV-2 epitope-specific TCR CDR3β sequence datasets. In particular, we found the proline at position 4 corresponding to the P272L mutation in the SARS-CoV-2 S-protein269-277 epitope (YLQPRTFLL) may contribute substantially to TCR recognition of the epitope through interpreting the output attention weights of our model. We anticipate that our findings will provide new directions for constructing a reliable data-driven model to predict the immunogenic T-cell epitopes using limited training data and help accelerate the development of an effective vaccine in response to SARS-CoV-2 variants.},
  langid = {english},
  pubstate = {prepublished},
  annotation = {0 citations (Crossref) [2022-08-19]},
  file = {/Users/Nasy/Zotero/storage/DETW4F4Z/Han and Lee - 2021 - Predicting SARS-CoV-2 epitope-specific TCR recogni.pdf;/Users/Nasy/Zotero/storage/7YPYRKD8/2021.11.17.468929v1.html}
}

@inproceedings{haubergGeometricTakeMetric2012,
  title = {A {{Geometric}} Take on {{Metric Learning}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {ren Hauberg, Sø and Freifeld, Oren and Black, Michael},
  date = {2012},
  volume = {25},
  publisher = {Curran Associates, Inc.},
  url = {https://proceedings.neurips.cc/paper_files/paper/2012/hash/ec5aa0b7846082a2415f0902f0da88f2-Abstract.html},
  urldate = {2023-08-14},
  abstract = {Multi-metric learning techniques learn local metric tensors in different parts of a feature space. With such an approach, even simple classifiers can be competitive with the state-of-the-art because the distance measure locally adapts to the structure of the data. The learned distance measure is, however, non-metric, which has prevented multi-metric learning from generalizing to tasks such as dimensionality reduction and regression in a principled way. We prove that, with appropriate changes, multi-metric learning corresponds to learning the structure of a Riemannian manifold. We then show that this structure gives us a principled way to perform dimensionality reduction and regression according to the learned metrics. Algorithmically, we provide the first practical algorithm for computing geodesics according to the learned metrics, as well as algorithms for computing exponential and logarithmic maps on the Riemannian manifold. Together, these tools let many Euclidean algorithms take advantage of multi-metric learning. We illustrate the approach on regression and dimensionality reduction tasks that involve predicting measurements of the human body from shape data.},
  file = {/Users/Nasy/Zotero/storage/3WDZE77B/Hauberg et al. - 2012 - A Geometric take on Metric Learning.pdf}
}

@article{hausenMorpheusDeepLearning2020,
  title = {Morpheus: {{A Deep Learning Framework}} for the {{Pixel-level Analysis}} of {{Astronomical Image Data}}},
  shorttitle = {Morpheus},
  author = {Hausen, Ryan and Robertson, Brant E.},
  date = {2020-05},
  journaltitle = {ApJS},
  volume = {248},
  number = {1},
  pages = {20},
  publisher = {American Astronomical Society},
  issn = {0067-0049},
  doi = {10.3847/1538-4365/ab8868},
  url = {https://doi.org/10.3847/1538-4365/ab8868},
  urldate = {2022-07-29},
  abstract = {We present Morpheus, a new model for generating pixel-level morphological classifications of astronomical sources. Morpheus leverages advances in deep learning to perform source detection, source segmentation, and morphological classification pixel-by-pixel via a semantic segmentation algorithm adopted from the field of computer vision. By utilizing morphological information about the flux of real astronomical sources during object detection, Morpheus shows resiliency to false-positive identifications of sources. We evaluate Morpheus by performing source detection, source segmentation, morphological classification on the Hubble Space Telescope data in the five CANDELS fields with a focus on the GOODS South field, and demonstrate a high completeness in recovering known GOODS South 3D-HST sources with H {$<$} 26 AB. We release the code publicly, provide online demonstrations, and present an interactive visualization of the Morpheus results in GOODS South.},
  langid = {english},
  annotation = {29 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/RJMMBVEA/Hausen and Robertson - 2020 - $less$i$greater$Morpheus$less$i$greater$ A D.pdf}
}

@unpublished{heDeepResidualLearning2015,
  title = {Deep {{Residual Learning}} for {{Image Recognition}}},
  author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  date = {2015-12-10},
  eprint = {1512.03385},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/1512.03385},
  urldate = {2022-03-11},
  abstract = {Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57\% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers. The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28\% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC \& COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.},
  keywords = {Computer Science - Computer Vision and Pattern Recognition},
  file = {/Users/Nasy/Zotero/storage/TSYP4GVT/He et al. - 2015 - Deep Residual Learning for Image Recognition.pdf;/Users/Nasy/Zotero/storage/E5FGX5W4/1512.html}
}

@unpublished{hermansDefenseTripletLoss2017,
  title = {In {{Defense}} of the {{Triplet Loss}} for {{Person Re-Identification}}},
  author = {Hermans, Alexander and Beyer, Lucas and Leibe, Bastian},
  date = {2017-11-21},
  eprint = {1703.07737},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/1703.07737},
  abstract = {In the past few years, the field of computer vision has gone through a revolution fueled mainly by the advent of large datasets and the adoption of deep convolutional neural networks for end-to-end learning. The person re-identification subfield is no exception to this. Unfortunately, a prevailing belief in the community seems to be that the triplet loss is inferior to using surrogate losses (classification, verification) followed by a separate metric learning step. We show that, for models trained from scratch as well as pretrained ones, using a variant of the triplet loss to perform end-to-end deep metric learning outperforms most other published methods by a large margin.},
  keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Neural and Evolutionary Computing},
  file = {/Users/Nasy/Zotero/storage/RTBRZ8H8/Hermans et al. - 2017 - In Defense of the Triplet Loss for Person Re-Ident.pdf;/Users/Nasy/Zotero/storage/DF7QQ686/1703.html}
}

@inproceedings{hoMoreEffectiveDistributed2013,
  title = {More {{Effective Distributed ML}} via a {{Stale Synchronous Parallel Parameter Server}}},
  booktitle = {Proceedings of the 26th {{International Conference}} on {{Neural Information Processing Systems}} - {{Volume}} 1},
  author = {Ho, Qirong and Cipar, James and Cui, Henggang and Kim, Jin Kyu and Lee, Seunghak and Gibbons, Phillip B. and Gibson, Garth A. and Ganger, Gregory R. and Xing, Eric P.},
  date = {2013},
  series = {{{NIPS}}'13},
  pages = {1223--1231},
  publisher = {Curran Associates Inc.},
  location = {Red Hook, NY, USA},
  abstract = {We propose a parameter server system for distributed ML, which follows a Stale Synchronous Parallel (SSP) model of computation that maximizes the time computational workers spend doing useful work on ML algorithms, while still providing correctness guarantees. The parameter server provides an easy-to-use shared interface for read/write access to an ML model's values (parameters and variables), and the SSP model allows distributed workers to read older, stale versions of these values from a local cache, instead of waiting to get them from a central storage. This significantly increases the proportion of time workers spend computing, as opposed to waiting. Furthermore, the SSP model ensures ML algorithm correctness by limiting the maximum age of the stale values. We provide a proof of correctness under SSP, as well as empirical results demonstrating that the SSP model achieves faster algorithm convergence on several different ML problems, compared to fully-synchronous and asynchronous schemes.},
  venue = {Lake Tahoe, Nevada},
  keywords = {nosource}
}

@online{hoogeboomHexaConv2018,
  title = {{{HexaConv}}},
  author = {Hoogeboom, Emiel and Peters, Jorn W. T. and Cohen, Taco S. and Welling, Max},
  date = {2018-03-06},
  eprint = {1803.02108},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.1803.02108},
  url = {http://arxiv.org/abs/1803.02108},
  abstract = {The effectiveness of Convolutional Neural Networks stems in large part from their ability to exploit the translation invariance that is inherent in many learning problems. Recently, it was shown that CNNs can exploit other invariances, such as rotation invariance, by using group convolutions instead of planar convolutions. However, for reasons of performance and ease of implementation, it has been necessary to limit the group convolution to transformations that can be applied to the filters without interpolation. Thus, for images with square pixels, only integer translations, rotations by multiples of 90 degrees, and reflections are admissible. Whereas the square tiling provides a 4-fold rotational symmetry, a hexagonal tiling of the plane has a 6-fold rotational symmetry. In this paper we show how one can efficiently implement planar convolution and group convolution over hexagonal lattices, by re-using existing highly optimized convolution routines. We find that, due to the reduced anisotropy of hexagonal filters, planar HexaConv provides better accuracy than planar convolution with square filters, given a fixed parameter budget. Furthermore, we find that the increased degree of symmetry of the hexagonal grid increases the effectiveness of group convolutions, by allowing for more parameter sharing. We show that our method significantly outperforms conventional CNNs on the AID aerial scene classification dataset, even outperforming ImageNet pre-trained models.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/V6QTQTAK/Hoogeboom et al. - 2018 - HexaConv.pdf;/Users/Nasy/Zotero/storage/X3BT5UND/1803.html}
}

@inproceedings{horieIsometricTransformationInvariant2022,
  title = {Isometric {{Transformation Invariant}} and {{Equivariant Graph Convolutional Networks}}},
  author = {Horie, Masanobu and Morita, Naoki and Hishinuma, Toshiaki and Ihara, Yu and Mitsume, Naoto},
  date = {2022-02-10},
  url = {https://openreview.net/forum?id=FX0vR39SJ5q},
  abstract = {Graphs are one of the most important data structures for representing pairwise relations between objects. Specifically, a graph embedded in a Euclidean space is essential to solving real problems, such as physical simulations. A crucial requirement for applying graphs in Euclidean spaces to physical simulations is learning and inferring the isometric transformation invariant and equivariant features in a computationally efficient manner. In this paper, we propose a set of transformation invariant and equivariant models based on graph convolutional networks, called IsoGCNs. We demonstrate that the proposed model has a competitive performance compared to state-of-the-art methods on tasks related to geometrical and physical simulation data. Moreover, the proposed model can scale up to graphs with 1M vertices and conduct an inference faster than a conventional finite element analysis, which the existing equivariant models cannot achieve.},
  eventtitle = {International {{Conference}} on {{Learning Representations}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/DLKIHQBL/Horie et al. - 2022 - Isometric Transformation Invariant and Equivariant.pdf;/Users/Nasy/Zotero/storage/WT7Y4EUX/forum.html}
}

@inproceedings{horieIsometricTransformationInvariant2022a,
  title = {Isometric {{Transformation Invariant}} and {{Equivariant Graph Convolutional Networks}}},
  author = {Horie, Masanobu and Morita, Naoki and Hishinuma, Toshiaki and Ihara, Yu and Mitsume, Naoto},
  date = {2022-02-10},
  url = {https://openreview.net/forum?id=FX0vR39SJ5q},
  urldate = {2022-11-10},
  abstract = {Graphs are one of the most important data structures for representing pairwise relations between objects. Specifically, a graph embedded in a Euclidean space is essential to solving real problems, such as physical simulations. A crucial requirement for applying graphs in Euclidean spaces to physical simulations is learning and inferring the isometric transformation invariant and equivariant features in a computationally efficient manner. In this paper, we propose a set of transformation invariant and equivariant models based on graph convolutional networks, called IsoGCNs. We demonstrate that the proposed model has a competitive performance compared to state-of-the-art methods on tasks related to geometrical and physical simulation data. Moreover, the proposed model can scale up to graphs with 1M vertices and conduct an inference faster than a conventional finite element analysis, which the existing equivariant models cannot achieve.},
  eventtitle = {International {{Conference}} on {{Learning Representations}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/MPXSE87Y/Horie et al. - 2022 - Isometric Transformation Invariant and Equivariant.pdf;/Users/Nasy/Zotero/storage/SGUPLVYH/forum.html}
}

@online{horiePhysicsEmbeddedNeuralNetworks2022,
  title = {Physics-{{Embedded Neural Networks}}: \$\textbackslash boldsymbol\{\textbackslash mathrm\{\vphantom{\}\}}{{E}}\vphantom\{\}(n)\vphantom\{\}\$-{{Equivariant Graph Neural PDE Solvers}}},
  shorttitle = {Physics-{{Embedded Neural Networks}}},
  author = {Horie, Masanobu and Mitsume, Naoto},
  date = {2022-05-24},
  eprint = {2205.11912},
  eprinttype = {arXiv},
  doi = {10.48550/arXiv.2205.11912},
  url = {http://arxiv.org/abs/2205.11912},
  abstract = {Graph neural network (GNN) is a promising approach to learning and predicting physical phenomena described in boundary value problems, such as partial differential equations (PDEs) with boundary conditions. However, existing models inadequately treat boundary conditions essential for the reliable prediction of such problems. In addition, because of the locally connected nature of GNNs, it is difficult to accurately predict the state after a long time, where interaction between vertices tends to be global. We present our approach termed physics-embedded neural networks that considers boundary conditions and predicts the state after a long time using an implicit method. It is built based on an \$\textbackslash mathrm\{E\}(n)\$-equivariant GNN, resulting in high generalization performance on various shapes. We demonstrate that our model learns flow phenomena in complex shapes and outperforms a well-optimized classical solver and a state-of-the-art machine learning model in speed-accuracy trade-off. Therefore, our model can be a useful standard for realizing reliable, fast, and accurate GNN-based PDE solvers.},
  pubstate = {prepublished},
  keywords = {Computer Science - Computational Engineering Finance and Science,Computer Science - Machine Learning},
  annotation = {0 citations (Semantic Scholar/arXiv) [2022-11-03]\\
1 citations (Semantic Scholar/DOI) [2022-11-03]},
  file = {/Users/Nasy/Zotero/storage/336PLSXD/Horie and Mitsume - 2022 - Physics-Embedded Neural Networks $boldsymbol ma.pdf;/Users/Nasy/Zotero/storage/JVUYG2Z7/2205.html}
}

@article{houAnalysisRepertoireFeatures2016,
  title = {Analysis of the {{Repertoire Features}} of {{TCR Beta Chain CDR3}} in {{Human}} by {{High-Throughput Sequencing}}},
  author = {Hou, Xianliang and Wang, Mingbang and Lu, Chong and Xie, Qian and Cui, Guangying and Chen, Jianing and Du, Yu and Dai, Yong and Diao, Hongyan},
  date = {2016},
  journaltitle = {CPB},
  volume = {39},
  number = {2},
  eprint = {27442436},
  eprinttype = {pmid},
  pages = {651--667},
  publisher = {Karger Publishers},
  issn = {1015-8987, 1421-9778},
  doi = {10.1159/000445656},
  url = {https://www.karger.com/Article/FullText/445656},
  urldate = {2022-05-12},
  abstract = {\textbf{\emph{Background/Aims: }}To ward off a wide variety of pathogens, the human adaptive immune system harbors a vast array of T-cell receptors, collectively referred to as the TCR repertoire. Assessment of the repertoire features of TCR is vital for us to deeper understand of immune behaviour and immune response. \textbf{\emph{Methods: }}In this study, we used a combination of multiplex-PCR, Illumina sequencing and IMGT (ImMunoGeneTics)/HighV-QUEST for a standardized analysis of the repertoire features of TCR beta chain in the blood of healthy individuals, including the repertoire features of public TCR complementarity-determining regions (CDR3) sequences, highly expanded clones, long TCR CDR3 sequences. \textbf{\emph{Results: }}We found that public CDR3 sequences and high-frequency sequences had the same characteristics, both of them had fewer nucleotide additions and shorter CDR3 length, which were closer to the germline sequence. Moreover, our studies provided evidence that public amino acid sequences are produced by multiple nucleotide sequences. Notably, there was skewed VDJ segment usage in long CDR3 sequences, the expression levels of 10 TRβV segments, 7 TRβJ segments and 2 TRβD segments were significantly different in the long CDR3 sequences compared to the short CDR3 sequences. Moreover, we identified that extensive N additions and increase of D gene usage contributing to TCR CDR3 length, and observed there was distinct usage frequency of amino acids in long CDR3 sequences compared to the short CDR3 sequences. \textbf{\emph{Conclusions: }}Some repertoire features could be observed in the public sequences, highly abundance clones, and long TCR CDR3 sequences, which might be helpful for further study of immune behavior and immune response.},
  langid = {english},
  annotation = {21 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/QTBIS9WH/Hou et al. - 2016 - Analysis of the Repertoire Features of TCR Beta Ch.pdf}
}

@online{HowImportBibTex,
  title = {How to Import {{BibTex}} to {{Zotero}}},
  url = {https://forums.zotero.org/discussion/66009/how-to-import-bibtex-to-zotero},
  urldate = {2022-03-17},
  abstract = {Zotero is a powerful, easy-to-use research tool that helps you gather, organize, and analyze sources and then share the results of your research.},
  langid = {english},
  organization = {Zotero Forums},
  keywords = {nosource}
}

@inproceedings{huangEquivariantGraphMechanics2022,
  title = {Equivariant {{Graph Mechanics Networks}} with {{Constraints}}},
  author = {Huang, Wenbing and Han, Jiaqi and Rong, Yu and Xu, Tingyang and Sun, Fuchun and Huang, Junzhou},
  date = {2022-03-24},
  url = {https://openreview.net/forum?id=SHbhHHfePhP},
  urldate = {2022-10-20},
  abstract = {Learning to reason about relations and dynamics over multiple interacting objects is a challenging topic in machine learning. The challenges mainly stem from that the interacting systems are exponentially-compositional, symmetrical, and commonly geometrically-constrained. Current methods, particularly the ones based on equivariant Graph Neural Networks (GNNs), have targeted on the first two challenges but remain immature for constrained systems. In this paper, we propose Graph Mechanics Network (GMN) which is combinatorially efficient, equivariant and constraint-aware. The core of GMN is that it represents, by generalized coordinates, the forward kinematics information (positions and velocities) of a structural object. In this manner, the geometrical constraints are implicitly and naturally encoded in the forward kinematics. Moreover, to allow equivariant message passing in GMN, we have developed a general form of orthogonality-equivariant functions, given that the dynamics of constrained systems are more complicated than the unconstrained counterparts. Theoretically, the proposed equivariant formulation is proved to be universally expressive under certain conditions. Extensive experiments support the advantages of GMN compared to the state-of-the-art GNNs in terms of prediction accuracy, constraint satisfaction and data efficiency on the simulated systems consisting of particles, sticks and hinges, as well as two real-world datasets for molecular dynamics prediction and human motion capture.},
  eventtitle = {International {{Conference}} on {{Learning Representations}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/IDBUVTQA/Huang et al. - 2022 - Equivariant Graph Mechanics Networks with Constrai.pdf;/Users/Nasy/Zotero/storage/B28RQI5J/forum.html}
}

@inproceedings{huangHGAMNHeterogeneousGraph2021,
  title = {{{HGAMN}}: {{Heterogeneous Graph Attention Matching Network}} for {{Multilingual POI Retrieval}} at {{Baidu Maps}}},
  shorttitle = {{{HGAMN}}},
  booktitle = {Proceedings of the 27th {{ACM SIGKDD Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Huang, Jizhou and Wang, Haifeng and Sun, Yibo and Fan, Miao and Huang, Zhengjie and Yuan, Chunyuan and Li, Yawen},
  date = {2021-08-14},
  series = {{{KDD}} '21},
  pages = {3032--3040},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3447548.3467059},
  url = {https://doi.org/10.1145/3447548.3467059},
  urldate = {2022-05-27},
  abstract = {The increasing interest in international travel has raised the demand of retrieving point of interests (POIs) in multiple languages. This is even superior to find local venues such as restaurants and scenic spots in unfamiliar languages when traveling abroad. Multilingual POI retrieval, enabling users to find desired POIs in a demanded language using queries in numerous languages, has become an indispensable feature of today's global map applications such as Baidu Maps. This task is non-trivial because of two key challenges: (1) visiting sparsity and (2) multilingual query-POI matching. To this end, we propose a Heterogeneous Graph Attention Matching Network (HGAMN) to concurrently address both challenges. Specifically, we construct a heterogeneous graph that contains two types of nodes: POI node and query node using the search logs of Baidu Maps. First, to alleviate challenge \#1, we construct edges between different POI nodes to link the low-frequency POIs with the high-frequency ones, which enables the transfer of knowledge from the latter to the former. Second, to mitigate challenge \#2, we construct edges between POI and query nodes based on the co-occurrences between queries and POIs, where queries in different languages and formulations can be aggregated for individual POIs. Moreover, we develop an attention-based network to jointly learn node representations of the heterogeneous graph and further design a cross-attention module to fuse the representations of both types of nodes for query-POI relevance scoring. In this way, the relevance ranking between multilingual queries and POIs with different popularity can be better handled. Extensive experiments conducted on large-scale real-world datasets from Baidu Maps demonstrate the superiority and effectiveness of HGAMN. In addition, HGAMN has already been deployed in production at Baidu Maps, and it successfully keeps serving hundreds of millions of requests every day. Compared with the previously deployed model, HGAMN achieves significant performance improvement, which confirms that HGAMN is a practical and robust solution for large-scale real-world multilingual POI retrieval service.},
  isbn = {978-1-4503-8332-5},
  keywords = {Baidu maps,graph neural network,heterogeneous graph,multilingual POI retrieval,nosource,POI search},
  annotation = {1 citations (Crossref) [2022-08-03]}
}

@article{huangTimeefficientLinearspaceLocal1991,
  title = {A Time-Efficient, Linear-Space Local Similarity Algorithm},
  author = {Huang, Xiaoqiu and Miller, Webb},
  date = {1991-09-01},
  journaltitle = {Adv. Appl. Math.},
  volume = {12},
  number = {3},
  pages = {337--357},
  issn = {0196-8858},
  doi = {10.1016/0196-8858(91)90017-D},
  url = {https://doi.org/10.1016/0196-8858(91)90017-D},
  urldate = {2022-08-19},
  abstract = {Dynamic programming algorithms to determine similar regions of two sequences are useful for analyzing biosequence data. This paper presents a time-efficient algorithm that produces k best ''non-intersecting'' local alignments for any chosen k. The algorithm's main strength is that it needs only O(M + N + K) space, where M and N are the lengths of the given sequences and K is the total length of the computed alignments.},
  annotation = {745 citations (Crossref) [2022-08-19]},
  file = {/Users/Nasy/Zotero/storage/TN2RA72K/Huang and Miller - 1991 - A time-efficient, linear-space local similarity al.pdf}
}

@article{huertaEnablingRealtimeMultimessenger2019,
  title = {Enabling Real-Time Multi-Messenger Astrophysics Discoveries with Deep Learning},
  author = {Huerta, E. A. and Allen, Gabrielle and Andreoni, Igor and Antelis, Javier M. and Bachelet, Etienne and Berriman, G. Bruce and Bianco, Federica B. and Biswas, Rahul and Carrasco~Kind, Matias and Chard, Kyle and Cho, Minsik and Cowperthwaite, Philip S. and Etienne, Zachariah B. and Fishbach, Maya and Forster, Francisco and George, Daniel and Gibbs, Tom and Graham, Matthew and Gropp, William and Gruendl, Robert and Gupta, Anushri and Haas, Roland and Habib, Sarah and Jennings, Elise and Johnson, Margaret W. G. and Katsavounidis, Erik and Katz, Daniel S. and Khan, Asad and Kindratenko, Volodymyr and Kramer, William T. C. and Liu, Xin and Mahabal, Ashish and Marka, Zsuzsa and McHenry, Kenton and Miller, J. M. and Moreno, Claudia and Neubauer, M. S. and Oberlin, Steve and Olivas, Alexander R. and Petravick, Donald and Rebei, Adam and Rosofsky, Shawn and Ruiz, Milton and Saxton, Aaron and Schutz, Bernard F. and Schwing, Alex and Seidel, Ed and Shapiro, Stuart L. and Shen, Hongyu and Shen, Yue and Singer, Leo P. and Sipocz, Brigitta M. and Sun, Lunan and Towns, John and Tsokaros, Antonios and Wei, Wei and Wells, Jack and Williams, Timothy J. and Xiong, Jinjun and Zhao, Zhizhen},
  date = {2019-10},
  journaltitle = {Nat Rev Phys},
  volume = {1},
  number = {10},
  pages = {600--608},
  issn = {2522-5820},
  doi = {10.1038/s42254-019-0097-4},
  url = {http://www.nature.com/articles/s42254-019-0097-4},
  urldate = {2021-04-29},
  issue = {10},
  langid = {english},
  annotation = {37 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/5SR2E3CD/Huerta et al. - 2019 - Enabling real-time multi-messenger astrophysics di.pdf}
}

@incollection{huHeterogeneousGraphTransformer2020,
  title = {Heterogeneous {{Graph Transformer}}},
  booktitle = {Proceedings of {{The Web Conference}} 2020},
  author = {Hu, Ziniu and Dong, Yuxiao and Wang, Kuansan and Sun, Yizhou},
  date = {2020-04-20},
  pages = {2704--2710},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  url = {https://doi.org/10.1145/3366423.3380027},
  abstract = {Recent years have witnessed the emerging success of graph neural networks (GNNs) for modeling structured data. However, most GNNs are designed for homogeneous graphs, in which all nodes and edges belong to the same types, making it infeasible to represent heterogeneous structures. In this paper, we present the Heterogeneous Graph Transformer (HGT) architecture for modeling Web-scale heterogeneous graphs. To model heterogeneity, we design node- and edge-type dependent parameters to characterize the heterogeneous attention over each edge, empowering HGT to maintain dedicated representations for different types of nodes and edges. To handle Web-scale graph data, we design the heterogeneous mini-batch graph sampling algorithm—HGSampling—for efficient and scalable training. Extensive experiments on the Open Academic Graph of 179 million nodes and 2 billion edges show that the proposed HGT model consistently outperforms all the state-of-the-art GNN baselines by 9–21 on various downstream tasks. The dataset and source code of HGT are publicly available at https://github.com/acbull/pyHGT.},
  isbn = {978-1-4503-7023-3},
  keywords = {Computer Science - Machine Learning,Computer Science - Social and Information Networks,Graph Attention,Graph Embedding,Graph Neural Networks,Heterogeneous Information Networks,Representation Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/FT72WVT7/Hu et al. - 2020 - Heterogeneous Graph Transformer.pdf;/Users/Nasy/Zotero/storage/RI2SSALP/Hu et al. - 2020 - Heterogeneous Graph Transformer.pdf;/Users/Nasy/Zotero/storage/GUJDVITT/2003.html}
}

@inproceedings{husseinAreMetaPathsNecessary2018,
  title = {Are {{Meta-Paths Necessary}}? {{Revisiting Heterogeneous Graph Embeddings}}},
  shorttitle = {Are {{Meta-Paths Necessary}}?},
  booktitle = {Proceedings of the 27th {{ACM International Conference}} on {{Information}} and {{Knowledge Management}}},
  author = {Hussein, Rana and Yang, Dingqi and Cudré-Mauroux, Philippe},
  date = {2018-10-17},
  series = {{{CIKM}} '18},
  pages = {437--446},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3269206.3271777},
  url = {https://doi.org/10.1145/3269206.3271777},
  urldate = {2022-05-25},
  abstract = {The graph embedding paradigm projects nodes of a graph into a vector space, which can facilitate various downstream graph analysis tasks such as node classification and clustering. To efficiently learn node embeddings from a graph, graph embedding techniques usually preserve the proximity between node pairs sampled from the graph using random walks. In the context of a heterogeneous graph, which contains nodes from different domains, classical random walks are biased towards highly visible domains where nodes are associated with a dominant number of paths. To overcome this bias, existing heterogeneous graph embedding techniques typically rely on meta-paths (i.e., fixed sequences of node types) to guide random walks. However, using these meta-paths either requires prior knowledge from domain experts for optimal meta-path selection, or requires extended computations to combine all meta-paths shorter than a predefined length. In this paper, we propose an alternative solution that does not involve any meta-path. Specifically, we propose JUST, a heterogeneous graph embedding technique using random walks with JUmp and STay strategies to overcome the aforementioned bias in an more efficient manner. JUST can not only gracefully balance between homogeneous and heterogeneous edges, it can also balance the node distribution over different domains (i.e., node types). By conducting a thorough empirical evaluation of our method on three heterogeneous graph datasets, we show the superiority of our proposed technique. In particular, compared to a state-of-the-art heterogeneous graph embedding technique Hin2vec, which tries to optimally combine all meta-paths shorter than a predefined length, our technique yields better results in most experiments, with a dramatically reduced embedding learning time (about 3x speedup).},
  isbn = {978-1-4503-6014-2},
  keywords = {graph embedding,heterogeneous graph,random walk},
  annotation = {37 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/TY4EFJCM/Hussein et al. - 2018 - Are Meta-Paths Necessary Revisiting Heterogeneous.pdf}
}

@article{huthAntigenSpecificTCRSignatures2019,
  title = {Antigen-{{Specific TCR Signatures}} of {{Cytomegalovirus Infection}}},
  author = {Huth, Alina and Liang, Xiaoling and Krebs, Stefan and Blum, Helmut and Moosmann, Andreas},
  date = {2019-02-01},
  journaltitle = {J Immunol},
  volume = {202},
  number = {3},
  eprint = {30587531},
  eprinttype = {pmid},
  pages = {979--990},
  issn = {1550-6606},
  doi = {10.4049/jimmunol.1801401},
  abstract = {CMV is a prevalent human pathogen. The virus cannot be eliminated from the body, but is kept in check by CMV-specific T cells. Patients with an insufficient T cell response, such as transplant recipients, are at high risk of developing CMV disease. However, the CMV-specific T cell repertoire is complex, and it is not yet clear which T cells protect best against virus reactivation and disease. In this study, we present a highly resolved characterization of CMV-specific human CD8+ T cells based on enrichment by specific peptide stimulation and mRNA sequencing of their TCR β-chains (TCRβ). Our analysis included recently identified T cell epitopes restricted through HLA-C, whose presentation is resistant to viral immunomodulation, and well-studied HLA-B-restricted epitopes. In eight healthy virus carriers, we identified a total of 1052 CMV-specific TCRβ sequences. HLA-C-restricted, CMV-specific TCRβ clonotypes dominated the ex vivo T cell response and contributed the highest-frequency clonotype of the entire repertoire in two of eight donors. We analyzed sharing and similarity of CMV-specific TCRβ sequences and identified 63 public or related sequences belonging to 17 public TCRβ families. In our cohort, and in an independent cohort of 352 donors, the cumulative frequency of these public TCRβ family members was a highly discriminatory indicator of carrying both CMV infection and the relevant HLA type. Based on these findings, we propose CMV-specific TCRβ signatures as a biomarker for an antiviral T cell response to identify patients in need of treatment and to guide future development of immunotherapy.},
  langid = {english},
  keywords = {Antigens Viral,CD8-Positive T-Lymphocytes,Cells Cultured,Cytomegalovirus,Cytomegalovirus Infections,Epitopes T-Lymphocyte,High-Throughput Nucleotide Sequencing,Humans,Peptides,Receptors Antigen T-Cell alpha-beta,Transcriptome},
  annotation = {13 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/IVAAXBYI/Huth et al. - 2019 - Antigen-Specific TCR Signatures of Cytomegalovirus.pdf}
}

@article{huthAntigenSpecificTCRSignatures2019a,
  title = {Antigen-{{Specific TCR Signatures}} of {{Cytomegalovirus Infection}}},
  author = {Huth, Alina and Liang, Xiaoling and Krebs, Stefan and Blum, Helmut and Moosmann, Andreas},
  date = {2019-02-01},
  journaltitle = {J Immunol},
  volume = {202},
  number = {3},
  eprint = {30587531},
  eprinttype = {pmid},
  pages = {979--990},
  issn = {1550-6606},
  doi = {10.4049/jimmunol.1801401},
  abstract = {CMV is a prevalent human pathogen. The virus cannot be eliminated from the body, but is kept in check by CMV-specific T cells. Patients with an insufficient T cell response, such as transplant recipients, are at high risk of developing CMV disease. However, the CMV-specific T cell repertoire is complex, and it is not yet clear which T cells protect best against virus reactivation and disease. In this study, we present a highly resolved characterization of CMV-specific human CD8+ T cells based on enrichment by specific peptide stimulation and mRNA sequencing of their TCR β-chains (TCRβ). Our analysis included recently identified T cell epitopes restricted through HLA-C, whose presentation is resistant to viral immunomodulation, and well-studied HLA-B-restricted epitopes. In eight healthy virus carriers, we identified a total of 1052 CMV-specific TCRβ sequences. HLA-C-restricted, CMV-specific TCRβ clonotypes dominated the ex vivo T cell response and contributed the highest-frequency clonotype of the entire repertoire in two of eight donors. We analyzed sharing and similarity of CMV-specific TCRβ sequences and identified 63 public or related sequences belonging to 17 public TCRβ families. In our cohort, and in an independent cohort of 352 donors, the cumulative frequency of these public TCRβ family members was a highly discriminatory indicator of carrying both CMV infection and the relevant HLA type. Based on these findings, we propose CMV-specific TCRβ signatures as a biomarker for an antiviral T cell response to identify patients in need of treatment and to guide future development of immunotherapy.},
  langid = {english},
  keywords = {Antigens Viral,CD8-Positive T-Lymphocytes,Cells Cultured,Cytomegalovirus,Cytomegalovirus Infections,Epitopes T-Lymphocyte,High-Throughput Nucleotide Sequencing,Humans,Peptides,Receptors Antigen T-Cell alpha-beta,Transcriptome},
  annotation = {13 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/MDMEGPDD/Huth et al. - 2019 - Antigen-Specific TCR Signatures of Cytomegalovirus.pdf}
}

@inproceedings{hwangSelfsupervisedAuxiliaryLearning2020,
  title = {Self-Supervised {{Auxiliary Learning}} with {{Meta-paths}} for {{Heterogeneous Graphs}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Hwang, Dasol and Park, Jinyoung and Kwon, Sunyoung and Kim, KyungMin and Ha, Jung-Woo and Kim, Hyunwoo J},
  date = {2020},
  volume = {33},
  pages = {10294--10305},
  publisher = {Curran Associates, Inc.},
  url = {https://proceedings.neurips.cc/paper/2020/hash/74de5f915765ea59816e770a8e686f38-Abstract.html},
  urldate = {2022-05-26},
  abstract = {Graph neural networks have shown superior performance in a wide range of applications providing a powerful representation of graph-structured data.  Recent works show that the representation can be further improved by auxiliary tasks. However, the auxiliary tasks for heterogeneous graphs, which contain rich semantic information with various types of nodes and edges, have less explored in the literature. In this paper, to learn graph neural networks on heterogeneous graphs we propose a novel self-supervised auxiliary learning method using meta paths, which are composite relations of multiple edge types. Our proposed method is learning to learn a primary task by predicting meta-paths as auxiliary tasks. This can be viewed as a type of meta-learning. The proposed method can identify an effective combination of auxiliary tasks and automatically balance them to improve the primary task. Our methods can be applied to any graph neural networks in a plug-in manner without manual labeling or additional data. The experiments demonstrate that the proposed method consistently improves the performance of link prediction and node classification on heterogeneous graphs.},
  file = {/Users/Nasy/Zotero/storage/6FD63M5S/Hwang et al. - 2020 - Self-supervised Auxiliary Learning with Meta-paths.pdf}
}

@online{HyperbolicGeometryCannon,
  title = {Hyperbolic {{Geometry}} by {{J}}.{{W}}. {{Cannon}}, {{W}}.{{J}}. {{Floyd}}, {{R}}. {{Kenyon}}, {{W}}.{{R}}. {{Parry}} - {{Download}} Link},
  url = {https://www.e-booksdirectory.com/details.php?ebook=8005},
  urldate = {2023-03-10},
  file = {/Users/Nasy/Zotero/storage/DJ4835ZZ/details.html}
}

@online{iedbIEDBOrgFree2022,
  title = {{{IEDB}}.Org: {{Free}} Epitope Database and Prediction Resource},
  shorttitle = {{{IEDB}}.Org},
  author = {IEDB},
  date = {2022},
  url = {http://www.iedb.org},
  urldate = {2022-03-19},
  abstract = {Free resource for searching and exporting immune epitopes. Includes more than 95\% of all published infectious disease, allergy, autoimmune, and transplant epitope data.},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/N6J8ZCEY/www.iedb.org.html}
}

@online{inoueDataAugmentationPairing2018,
  title = {Data {{Augmentation}} by {{Pairing Samples}} for {{Images Classification}}},
  author = {Inoue, Hiroshi},
  date = {2018-04-11},
  eprint = {1801.02929},
  eprinttype = {arXiv},
  doi = {10.48550/arXiv.1801.02929},
  url = {http://arxiv.org/abs/1801.02929},
  urldate = {2024-10-31},
  abstract = {Data augmentation is a widely used technique in many machine learning tasks, such as image classification, to virtually enlarge the training dataset size and avoid overfitting. Traditional data augmentation techniques for image classification tasks create new samples from the original training data by, for example, flipping, distorting, adding a small amount of noise to, or cropping a patch from an original image. In this paper, we introduce a simple but surprisingly effective data augmentation technique for image classification tasks. With our technique, named SamplePairing, we synthesize a new sample from one image by overlaying another image randomly chosen from the training data (i.e., taking an average of two images for each pixel). By using two images randomly selected from the training set, we can generate \$N\textasciicircum 2\$ new samples from \$N\$ training samples. This simple data augmentation technique significantly improved classification accuracy for all the tested datasets; for example, the top-1 error rate was reduced from 33.5\% to 29.0\% for the ILSVRC 2012 dataset with GoogLeNet and from 8.22\% to 6.93\% in the CIFAR-10 dataset. We also show that our SamplePairing technique largely improved accuracy when the number of samples in the training set was very small. Therefore, our technique is more valuable for tasks with a limited amount of training data, such as medical imaging tasks.},
  pubstate = {prepublished},
  keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/28CZHIPG/Inoue - 2018 - Data Augmentation by Pairing Samples for Images Classification.pdf;/Users/Nasy/Zotero/storage/N7PXHXYC/1801.html}
}

@online{izadiOptimizationGraphNeural2020,
  title = {Optimization of {{Graph Neural Networks}} with {{Natural Gradient Descent}}},
  author = {Izadi, Mohammad Rasool and Fang, Yihao and Stevenson, Robert and Lin, Lizhen},
  date = {2020-08-21},
  eprint = {2008.09624},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/2008.09624},
  urldate = {2023-03-19},
  abstract = {In this work, we propose to employ information-geometric tools to optimize a graph neural network architecture such as the graph convolutional networks. More specifically, we develop optimization algorithms for the graph-based semi-supervised learning by employing the natural gradient information in the optimization process. This allows us to efficiently exploit the geometry of the underlying statistical model or parameter space for optimization and inference. To the best of our knowledge, this is the first work that has utilized the natural gradient for the optimization of graph neural networks that can be extended to other semi-supervised problems. Efficient computations algorithms are developed and extensive numerical studies are conducted to demonstrate the superior performance of our algorithms over existing algorithms such as ADAM and SGD.},
  pubstate = {prepublished},
  version = {1},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/L3CDH8MU/Izadi et al. - 2020 - Optimization of Graph Neural Networks with Natural.pdf;/Users/Nasy/Zotero/storage/CEBL9C7R/2008.html}
}

@article{jespersenAntibodySpecificBCell2019,
  title = {Antibody {{Specific B-Cell Epitope Predictions}}: {{Leveraging Information From Antibody-Antigen Protein Complexes}}},
  shorttitle = {Antibody {{Specific B-Cell Epitope Predictions}}},
  author = {Jespersen, Martin Closter and Mahajan, Swapnil and Peters, Bjoern and Nielsen, Morten and Marcatili, Paolo},
  date = {2019},
  journaltitle = {Frontiers in Immunology},
  volume = {10},
  issn = {1664-3224},
  url = {https://www.frontiersin.org/article/10.3389/fimmu.2019.00298},
  abstract = {B-cells can neutralize pathogenic molecules by targeting them with extreme specificity using receptors secreted or expressed on their surface (antibodies). This is achieved via molecular interactions between the paratope (i.e., the antibody residues involved in the binding) and the interacting region (epitope) of its target molecule (antigen). Discerning the rules that define this specificity would have profound implications for our understanding of humoral immunogenicity and its applications. The aim of this work is to produce improved, antibody-specific epitope predictions by exploiting features derived from the antigens and their cognate antibodies structures, and combining them using statistical and machine learning algorithms. We have identified several geometric and physicochemical features that are correlated in interacting paratopes and epitopes, used them to develop a Monte Carlo algorithm to generate putative epitopes-paratope pairs, and train a machine-learning model to score them. We show that, by including the structural and physicochemical properties of the paratope, we improve the prediction of the target of a given B-cell receptor. Moreover, we demonstrate a gain in predictive power both in terms of identifying the cognate antigen target for a given antibody and the antibody target for a given antigen, exceeding the results of other available tools.},
  file = {/Users/Nasy/Zotero/storage/IIU5L3ME/Jespersen et al. - 2019 - Antibody Specific B-Cell Epitope Predictions Leve.pdf}
}

@article{jiangGTEGraphLearning2024,
  title = {{{GTE}}: A Graph Learning Framework for Prediction of {{T-cell}} Receptors and Epitopes Binding Specificity},
  shorttitle = {{{GTE}}},
  author = {Jiang, Feng and Guo, Yuzhi and Ma, Hehuan and Na, Saiyang and Zhong, Wenliang and Han, Yi and Wang, Tao and Huang, Junzhou},
  date = {2024-07-01},
  journaltitle = {Briefings in Bioinformatics},
  volume = {25},
  number = {4},
  pages = {bbae343},
  issn = {1477-4054},
  doi = {10.1093/bib/bbae343},
  url = {https://doi.org/10.1093/bib/bbae343},
  urldate = {2024-10-30},
  abstract = {The interaction between T-cell receptors (TCRs) and peptides (epitopes) presented by major histocompatibility complex molecules (MHC) is fundamental to the immune response. Accurate prediction of TCR–epitope interactions is crucial for advancing the understanding of various diseases and their prevention and treatment. Existing methods primarily rely on sequence-based approaches, overlooking the inherent topology structure of TCR–epitope interaction networks. In this study, we present \$GTE\$, a novel heterogeneous Graph neural network model based on inductive learning to capture the topological structure between TCRs and Epitopes. Furthermore, we address the challenge of constructing negative samples within the graph by proposing a dynamic edge update strategy, enhancing model learning with the nonbinding TCR–epitope pairs. Additionally, to overcome data imbalance, we adapt the Deep AUC Maximization strategy to the graph domain. Extensive experiments are conducted on four public datasets to demonstrate the superiority of exploring underlying topological structures in predicting TCR–epitope interactions, illustrating the benefits of delving into complex molecular networks. The implementation code and data are available at https://github.com/uta-smile/GTE.},
  file = {/Users/Nasy/Zotero/storage/ESMNPHEF/Jiang et al. - 2024 - GTE a graph learning framework for prediction of T-cell receptors and epitopes binding specificity.pdf;/Users/Nasy/Zotero/storage/A5ILAC6L/7713742.html}
}

@inproceedings{jiangRobustPreTrainingAdversarial2020,
  title = {Robust {{Pre-Training}} by {{Adversarial Contrastive Learning}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Jiang, Ziyu and Chen, Tianlong and Chen, Ting and Wang, Zhangyang},
  editor = {Larochelle, H. and Ranzato, M. and Hadsell, R. and Balcan, M. F. and Lin, H.},
  date = {2020},
  volume = {33},
  pages = {16199--16210},
  publisher = {Curran Associates, Inc.},
  url = {https://proceedings.neurips.cc/paper/2020/file/ba7e36c43aff315c00ec2b8625e3b719-Paper.pdf},
  keywords = {nosource}
}

@inproceedings{jianTCellReceptorPeptideInteraction2022,
  title = {T-{{Cell Receptor-Peptide Interaction Prediction}} with {{Physical Model Augmented Pseudo-Labeling}}},
  booktitle = {Proceedings of the 28th {{ACM SIGKDD Conference}} on {{Knowledge Discovery}} and {{Data Mining}}},
  author = {Jian, Yiren and Kruus, Erik and Min, Martin Renqiang},
  date = {2022-08-14},
  series = {{{KDD}} '22},
  pages = {3090--3097},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3534678.3539075},
  url = {https://doi.org/10.1145/3534678.3539075},
  abstract = {Predicting the interactions between T-cell receptors (TCRs) and peptides is crucial for the development of personalized medicine and targeted vaccine in immunotherapy. Current datasets for training deep learning models of this purpose remain constrained without diverse TCRs and peptides. To combat the data scarcity issue presented in the current datasets, we propose to extend the training dataset by physical modeling of TCR-peptide pairs. Specifically, we compute the docking energies between auxiliary unknown TCR-peptide pairs as surrogate training labels. Then, we use these extended example-label pairs to train our model in a supervised fashion. Finally, we find that the AUC score for the prediction of the model can be further improved by pseudo-labeling of such unknown TCR-peptide pairs (by a trained teacher model), and re-training the model with those pseudo-labeled TCR-peptide pairs. Our proposed method that trains the deep neural network with physical modeling and data-augmented pseudo-labeling improves over baselines in the available two datasets. We also introduce a new dataset that contains over 80,000 unknown TCR-peptide pairs with docking energy scores.},
  isbn = {978-1-4503-9385-0},
  keywords = {deep neural network,docking energy,peptide recognition,physical modeling,pseudo-labeling,T-cell receptors},
  annotation = {0 citations (Crossref) [2022-08-24]},
  file = {/Users/Nasy/Zotero/storage/SE2CNSPP/Jian et al. - 2022 - T-Cell Receptor-Peptide Interaction Prediction wit.pdf}
}

@inproceedings{jianTCellReceptorPeptideInteraction2022a,
  title = {T-{{Cell Receptor-Peptide Interaction Prediction}} with {{Physical Model Augmented Pseudo-Labeling}}},
  booktitle = {Proceedings of the 28th {{ACM SIGKDD Conference}} on {{Knowledge Discovery}} and {{Data Mining}}},
  author = {Jian, Yiren and Kruus, Erik and Min, Martin Renqiang},
  date = {2022-08-14},
  series = {{{KDD}} '22},
  pages = {3090--3097},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3534678.3539075},
  url = {https://doi.org/10.1145/3534678.3539075},
  urldate = {2022-09-16},
  abstract = {Predicting the interactions between T-cell receptors (TCRs) and peptides is crucial for the development of personalized medicine and targeted vaccine in immunotherapy. Current datasets for training deep learning models of this purpose remain constrained without diverse TCRs and peptides. To combat the data scarcity issue presented in the current datasets, we propose to extend the training dataset by physical modeling of TCR-peptide pairs. Specifically, we compute the docking energies between auxiliary unknown TCR-peptide pairs as surrogate training labels. Then, we use these extended example-label pairs to train our model in a supervised fashion. Finally, we find that the AUC score for the prediction of the model can be further improved by pseudo-labeling of such unknown TCR-peptide pairs (by a trained teacher model), and re-training the model with those pseudo-labeled TCR-peptide pairs. Our proposed method that trains the deep neural network with physical modeling and data-augmented pseudo-labeling improves over baselines in the available two datasets. We also introduce a new dataset that contains over 80,000 unknown TCR-peptide pairs with docking energy scores.},
  isbn = {978-1-4503-9385-0},
  keywords = {deep neural network,docking energy,peptide recognition,physical modeling,pseudo-labeling,T-cell receptors},
  annotation = {0 citations (Crossref) [2022-09-16]}
}

@article{jing-minNewStellarSpectral2020,
  title = {A {{New Stellar Spectral Feature Extraction Method Based}} on {{Two-dimensional Fourier Spectrum Image}} and {{Its Application}} in the {{Stellar Spectral Classification Based}} on {{Deep Network}}},
  author = {Jing-min, ZHANG and Chen-ye, MA and Lu, Wang and Li-ting, Du and Ting-ting, XU and Lin-pin, Ai and Wei-hong, ZHOU},
  date = {2020-07-01},
  journaltitle = {Chinese Astronomy and Astrophysics},
  volume = {44},
  number = {3},
  pages = {334--344},
  issn = {0275-1062},
  doi = {10.1016/j.chinastron.2020.08.004},
  url = {https://www.sciencedirect.com/science/article/pii/S0275106220300771},
  urldate = {2022-07-29},
  abstract = {The classification of celestial spectra is one of the important contents of astronomical research. The key is to select and extract the most effective feature for classification from spectra data. In this paper, we propose a new feature extraction method for astronomical spectra based on two-dimensional Fourier spectrum image, and apply the method to the classification study of LAMOST (the Large Sky Area Multi-Object Fiber Spectroscopic Telescope) stellar spectral data. The spectra data are from LAMOST Data Release 5 (DR5). We select 30000 F, G, and K types of spectra data. The short-time Fourier transform (STFT) is used to transform the one-dimensional spectra data into two-dimensional Fourier spectrum images. We classify and test these two-dimensional Fourier spectrum images with a module based on deep convolution network, and the classification accuracy rate is 92.90\%. The experimental result shows that the LAMOST stellar spectra data can be transformed into the two-dimensional Fourier spectrum images by the STFT. These spectral images inform new features, and build a new feature space, which is effective for classification. The method is a fully new attempt in spectra classification, which has certainly a pioneering significance for the classification and mining of massive celestial spectra.},
  langid = {english},
  keywords = {Methods: data analysis,Stars: fundamental parameters,techniques: spectral analysis},
  annotation = {1 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/THR74U5R/S0275106220300771.html}
}

@inproceedings{jingAmalgamatingKnowledgeHeterogeneous2021,
  title = {Amalgamating {{Knowledge From Heterogeneous Graph Neural Networks}}},
  author = {Jing, Yongcheng and Yang, Yiding and Wang, Xinchao and Song, Mingli and Tao, Dacheng},
  date = {2021},
  pages = {15709--15718},
  url = {https://openaccess.thecvf.com/content/CVPR2021/html/Jing_Amalgamating_Knowledge_From_Heterogeneous_Graph_Neural_Networks_CVPR_2021_paper.html},
  urldate = {2022-05-26},
  eventtitle = {Proceedings of the {{IEEE}}/{{CVF Conference}} on {{Computer Vision}} and {{Pattern Recognition}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/MHM9ZTQ4/Jing et al. - 2021 - Amalgamating Knowledge From Heterogeneous Graph Ne.pdf;/Users/Nasy/Zotero/storage/PYSKTBHL/Jing_Amalgamating_Knowledge_From_Heterogeneous_Graph_Neural_Networks_CVPR_2021_paper.html}
}

@inproceedings{jinIterativeRefinementGraph2022,
  title = {Iterative {{Refinement Graph Neural Network}} for {{Antibody Sequence-Structure Co-design}}},
  author = {Jin, Wengong and Wohlwend, Jeremy and Barzilay, Regina and Jaakkola, Tommi S.},
  date = {2022-03-10},
  url = {https://openreview.net/forum?id=LI2bhrE_2A},
  urldate = {2022-07-30},
  abstract = {Antibodies are versatile proteins that bind to pathogens like viruses and stimulate the adaptive immune system. The specificity of antibody binding is determined by complementarity-determining...},
  eventtitle = {International {{Conference}} on {{Learning Representations}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/CZI8VHGX/Jin et al. - 2022 - Iterative Refinement Graph Neural Network for Anti.pdf;/Users/Nasy/Zotero/storage/LBCQVJCG/forum.html}
}

@article{joglekarCellAntigenDiscovery2019,
  title = {T Cell Antigen Discovery via Signaling and Antigen-Presenting Bifunctional Receptors},
  author = {Joglekar, Alok V. and Leonard, Michael T. and Jeppson, John D. and Swift, Margaret and Li, Guideng and Wong, Stephanie and Peng, Songming and Zaretsky, Jesse M. and Heath, James R. and Ribas, Antoni and Bethune, Michael T. and Baltimore, David},
  date = {2019-02},
  journaltitle = {Nat Methods},
  volume = {16},
  number = {2},
  eprint = {30700902},
  eprinttype = {pmid},
  pages = {191--198},
  issn = {1548-7105},
  doi = {10.1038/s41592-018-0304-8},
  abstract = {CD8+ T cells recognize and eliminate tumors in an antigen-specific manner. Despite progress in characterizing the antitumor T cell repertoire and function, the identification of target antigens remains a challenge. Here we describe the use of chimeric receptors called signaling and antigen-presenting bifunctional receptors (SABRs) in a cell-based platform for T cell receptor (TCR) antigen discovery. SABRs present an extracellular complex comprising a peptide and major histocompatibility complex (MHC), and induce intracellular signaling via a TCR-like signal after binding with a cognate TCR. We devised a strategy for antigen discovery using SABR libraries to screen thousands of antigenic epitopes. We validated this platform by identifying the targets recognized by public TCRs of known specificities. Moreover, we extended this approach for personalized neoantigen discovery.},
  langid = {english},
  pmcid = {PMC6755906},
  keywords = {Antigen Presentation,Antigen-Presenting Cells,Antigens,Antigens CD,Antigens Differentiation T-Lymphocyte,CD8-Positive T-Lymphocytes,Cloning Molecular,Coculture Techniques,Epitopes,False Positive Reactions,Gene Library,Green Fluorescent Proteins,HEK293 Cells,Humans,Immunotherapy,Jurkat Cells,K562 Cells,Lectins C-Type,Major Histocompatibility Complex,Oligonucleotides,Peptides,Receptors Antigen T-Cell,Signal Transduction},
  annotation = {64 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/YGC7REV5/Joglekar et al. - 2019 - T cell antigen discovery via signaling and antigen.pdf}
}

@article{jokinenPredictingRecognitionCell2021,
  title = {Predicting Recognition between {{T}} Cell Receptors and Epitopes with {{TCRGP}}},
  author = {Jokinen, Emmi and Huuhtanen, Jani and Mustjoki, Satu and Heinonen, Markus and Lähdesmäki, Harri},
  date = {2021-03-25},
  journaltitle = {PLOS Computational Biology},
  volume = {17},
  number = {3},
  pages = {e1008814},
  publisher = {Public Library of Science},
  issn = {1553-7358},
  doi = {10.1371/journal.pcbi.1008814},
  url = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1008814},
  urldate = {2022-05-14},
  abstract = {Adaptive immune system uses T cell receptors (TCRs) to recognize pathogens and to consequently initiate immune responses. TCRs can be sequenced from individuals and methods analyzing the specificity of the TCRs can help us better understand individuals’ immune status in different disorders. For this task, we have developed TCRGP, a novel Gaussian process method that predicts if TCRs recognize specified epitopes. TCRGP can utilize the amino acid sequences of the complementarity determining regions (CDRs) from TCRα and TCRβ chains and learn which CDRs are important in recognizing different epitopes. Our comprehensive evaluation with epitope-specific TCR sequencing data shows that TCRGP achieves on average higher prediction accuracy in terms of AUROC score than existing state-of-the-art methods in epitope-specificity predictions. We also propose a novel analysis approach for combined single-cell RNA and TCRαβ (scRNA+TCRαβ) sequencing data by quantifying epitope-specific TCRs with TCRGP and identify HBV-epitope specific T cells and their transcriptomic states in hepatocellular carcinoma patients.},
  langid = {english},
  keywords = {Epstein-Barr virus,HIV-1,Influenza A virus,Learning curves,Major histocompatibility complex,Protein sequencing,Sequence alignment,T cell receptors},
  annotation = {17 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/AF65HA2E/Jokinen et al. - 2021 - Predicting recognition between T cell receptors an.pdf;/Users/Nasy/Zotero/storage/HAMQTRUE/article.html}
}

@unpublished{jonesPhotometricRedshiftsCosmology2022,
  title = {Photometric {{Redshifts}} for {{Cosmology}}: {{Improving Accuracy}} and {{Uncertainty Estimates Using Bayesian Neural Networks}}},
  shorttitle = {Photometric {{Redshifts}} for {{Cosmology}}},
  author = {Jones, Evan and Do, Tuan and Boscoe, Bernie and Wan, Yujie and Nguyen, Zooey and Singal, Jack},
  date = {2022-02-14},
  eprint = {2202.07121},
  eprinttype = {arXiv},
  eprintclass = {astro-ph},
  url = {http://arxiv.org/abs/2202.07121},
  urldate = {2022-02-16},
  abstract = {We present results exploring the role that probabilistic deep learning models can play in cosmology from large scale astronomical surveys through estimating the distances to galaxies (redshifts) from photometry. Due to the massive scale of data coming from these new and upcoming sky surveys, machine learning techniques using galaxy photometry are increasingly adopted to predict galactic redshifts which are important for inferring cosmological parameters such as the nature of dark energy. Associated uncertainty estimates are also critical measurements, however, common machine learning methods typically provide only point estimates and lack uncertainty information as outputs. We turn to Bayesian neural networks (BNNs) as a promising way to provide accurate predictions of redshift values. We have compiled a new galaxy training dataset from the Hyper Suprime-Cam Survey, designed to mimic large surveys, but over a smaller portion of the sky. We evaluate the performance and accuracy of photometric redshift (photo-z) predictions from photometry using machine learning, astronomical and probabilistic metrics. We find that while the Bayesian neural network did not perform as well as non-Bayesian neural networks if evaluated solely by point estimate photo-z values, BNNs can provide uncertainty estimates that are necessary for cosmology},
  keywords = {Astrophysics - Astrophysics of Galaxies,Astrophysics - Cosmology and Nongalactic Astrophysics},
  file = {/Users/Nasy/Zotero/storage/TLRH8ND4/Jones et al. - 2022 - Photometric Redshifts for Cosmology Improving Acc.pdf;/Users/Nasy/Zotero/storage/DZZN25LX/2202.html}
}

@software{jordanKellerJordanModdednanogpt2024,
  title = {{{KellerJordan}}/Modded-Nanogpt},
  author = {Jordan, Keller},
  date = {2024-11-07T23:27:01Z},
  origdate = {2024-06-01T06:01:50Z},
  url = {https://github.com/KellerJordan/modded-nanogpt},
  urldate = {2024-11-08},
  abstract = {NanoGPT (124M) quality in 8.2 minutes}
}

@article{jumperHighlyAccurateProtein2021,
  title = {Highly Accurate Protein Structure Prediction with {{AlphaFold}}},
  author = {Jumper, John and Evans, Richard and Pritzel, Alexander and Green, Tim and Figurnov, Michael and Ronneberger, Olaf and Tunyasuvunakool, Kathryn and Bates, Russ and Žídek, Augustin and Potapenko, Anna and Bridgland, Alex and Meyer, Clemens and Kohl, Simon A. A. and Ballard, Andrew J. and Cowie, Andrew and Romera-Paredes, Bernardino and Nikolov, Stanislav and Jain, Rishub and Adler, Jonas and Back, Trevor and Petersen, Stig and Reiman, David and Clancy, Ellen and Zielinski, Michal and Steinegger, Martin and Pacholska, Michalina and Berghammer, Tamas and Bodenstein, Sebastian and Silver, David and Vinyals, Oriol and Senior, Andrew W. and Kavukcuoglu, Koray and Kohli, Pushmeet and Hassabis, Demis},
  date = {2021-08},
  journaltitle = {Nature},
  volume = {596},
  number = {7873},
  pages = {583--589},
  publisher = {Nature Publishing Group},
  issn = {1476-4687},
  doi = {10.1038/s41586-021-03819-2},
  url = {https://www.nature.com/articles/s41586-021-03819-2},
  urldate = {2022-06-17},
  abstract = {Proteins are essential to life, and understanding their structure can facilitate a mechanistic understanding of their function. Through an enormous experimental effort1–4, the structures of around 100,000 unique proteins have been determined5, but this represents a small fraction of the billions of known protein sequences6,7. Structural coverage is bottlenecked by the months to years of painstaking effort required to determine a single protein structure. Accurate computational approaches are needed to address this gap and to enable large-scale structural bioinformatics. Predicting the three-dimensional structure that a protein will adopt based solely on its amino acid sequence—the structure prediction component of the ‘protein folding problem’8—has been an important open research problem for more than 50~years9. Despite recent progress10–14, existing methods fall far~short of atomic accuracy, especially when no homologous structure is available. Here we provide the first computational method that can regularly predict protein structures with atomic accuracy even in cases in which no similar structure is known. We validated an entirely redesigned version of our neural network-based model, AlphaFold, in the challenging 14th Critical Assessment of protein Structure Prediction (CASP14)15, demonstrating accuracy competitive with experimental structures in a majority of cases and greatly outperforming other methods. Underpinning the latest version of AlphaFold is a novel machine learning approach that incorporates physical and biological knowledge about protein structure, leveraging multi-sequence alignments, into the design of the deep learning algorithm.},
  issue = {7873},
  langid = {english},
  keywords = {Computational biophysics,Machine learning,Protein structure predictions,Structural biology},
  annotation = {3675 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/DAM89FSB/Jumper et al. - 2021 - Highly accurate protein structure prediction with .pdf;/Users/Nasy/Zotero/storage/ZEC7YBDR/s41586-021-03819-2.html}
}

@article{jumperHighlyAccurateProtein2021a,
  title = {Highly Accurate Protein Structure Prediction with {{AlphaFold}}},
  author = {Jumper, John and Evans, Richard and Pritzel, Alexander and Green, Tim and Figurnov, Michael and Ronneberger, Olaf and Tunyasuvunakool, Kathryn and Bates, Russ and Žídek, Augustin and Potapenko, Anna and Bridgland, Alex and Meyer, Clemens and Kohl, Simon A. A. and Ballard, Andrew J. and Cowie, Andrew and Romera-Paredes, Bernardino and Nikolov, Stanislav and Jain, Rishub and Adler, Jonas and Back, Trevor and Petersen, Stig and Reiman, David and Clancy, Ellen and Zielinski, Michal and Steinegger, Martin and Pacholska, Michalina and Berghammer, Tamas and Bodenstein, Sebastian and Silver, David and Vinyals, Oriol and Senior, Andrew W. and Kavukcuoglu, Koray and Kohli, Pushmeet and Hassabis, Demis},
  date = {2021-08},
  journaltitle = {Nature},
  volume = {596},
  number = {7873},
  pages = {583--589},
  publisher = {Nature Publishing Group},
  issn = {1476-4687},
  doi = {10.1038/s41586-021-03819-2},
  url = {https://www.nature.com/articles/s41586-021-03819-2},
  urldate = {2024-10-30},
  abstract = {Proteins are essential to life, and understanding their structure can facilitate a mechanistic understanding of their function. Through an enormous experimental effort1–4, the structures of around 100,000 unique proteins have been determined5, but this represents a small fraction of the billions of known protein sequences6,7. Structural coverage is bottlenecked by the months to years of painstaking effort required to determine a single protein structure. Accurate computational approaches are needed to address this gap and to enable large-scale structural bioinformatics. Predicting the three-dimensional structure that a protein will adopt based solely on its amino acid sequence—the structure prediction component of the ‘protein folding problem’8—has been an important open research problem for more than 50~years9. Despite recent progress10–14, existing methods fall far~short of atomic accuracy, especially when no homologous structure is available. Here we provide the first computational method that can regularly predict protein structures with atomic accuracy even in cases in which no similar structure is known. We validated an entirely redesigned version of our neural network-based model, AlphaFold, in the challenging 14th Critical Assessment of protein Structure Prediction (CASP14)15, demonstrating accuracy competitive with experimental structures in a majority of cases and greatly outperforming other methods. Underpinning the latest version of AlphaFold is a novel machine learning approach that incorporates physical and biological knowledge about protein structure, leveraging multi-sequence alignments, into the design of the deep learning algorithm.},
  langid = {english},
  keywords = {Computational biophysics,Machine learning,Protein structure predictions,Structural biology},
  file = {/Users/Nasy/Zotero/storage/AXDYFWEH/Jumper et al. - 2021 - Highly accurate protein structure prediction with AlphaFold.pdf}
}

@online{kalamkarStudyBFLOAT16Deep2019,
  title = {A {{Study}} of {{BFLOAT16}} for {{Deep Learning Training}}},
  author = {Kalamkar, Dhiraj and Mudigere, Dheevatsa and Mellempudi, Naveen and Das, Dipankar and Banerjee, Kunal and Avancha, Sasikanth and Vooturi, Dharma Teja and Jammalamadaka, Nataraj and Huang, Jianyu and Yuen, Hector and Yang, Jiyan and Park, Jongsoo and Heinecke, Alexander and Georganas, Evangelos and Srinivasan, Sudarshan and Kundu, Abhisek and Smelyanskiy, Misha and Kaul, Bharat and Dubey, Pradeep},
  date = {2019-06-13},
  eprint = {1905.12322},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.1905.12322},
  url = {http://arxiv.org/abs/1905.12322},
  urldate = {2023-01-19},
  abstract = {This paper presents the first comprehensive empirical study demonstrating the efficacy of the Brain Floating Point (BFLOAT16) half-precision format for Deep Learning training across image classification, speech recognition, language modeling, generative networks and industrial recommendation systems. BFLOAT16 is attractive for Deep Learning training for two reasons: the range of values it can represent is the same as that of IEEE 754 floating-point format (FP32) and conversion to/from FP32 is simple. Maintaining the same range as FP32 is important to ensure that no hyper-parameter tuning is required for convergence; e.g., IEEE 754 compliant half-precision floating point (FP16) requires hyper-parameter tuning. In this paper, we discuss the flow of tensors and various key operations in mixed precision training, and delve into details of operations, such as the rounding modes for converting FP32 tensors to BFLOAT16. We have implemented a method to emulate BFLOAT16 operations in Tensorflow, Caffe2, IntelCaffe, and Neon for our experiments. Our results show that deep learning training using BFLOAT16 tensors achieves the same state-of-the-art (SOTA) results across domains as FP32 tensors in the same number of iterations and with no changes to hyper-parameters.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/VB37FBBD/Kalamkar et al. - 2019 - A Study of BFLOAT16 for Deep Learning Training.pdf;/Users/Nasy/Zotero/storage/28IT2UPQ/1905.html}
}

@unpublished{karimAdversarialTrainingFace2021,
  title = {Adversarial {{Training}} for {{Face Recognition Systems}} Using {{Contrastive Adversarial Learning}} and {{Triplet Loss Fine-tuning}}},
  author = {Karim, Nazmul and Khalid, Umar and Meeker, Nick and Samarasinghe, Sarinda},
  date = {2021-10-09},
  eprint = {2110.04459},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/2110.04459},
  urldate = {2022-03-17},
  abstract = {Though much work has been done in the domain of improving the adversarial robustness of facial recognition systems, a surprisingly small percentage of it has focused on self-supervised approaches. In this work, we present an approach that combines Ad-versarial Pre-Training with Triplet Loss AdversarialFine-Tuning. We compare our methods with the pre-trained ResNet50 model that forms the backbone of FaceNet, finetuned on our CelebA dataset. Through comparing adversarial robustness achieved without adversarial training, with triplet loss adversarial training, and our contrastive pre-training combined with triplet loss adversarial fine-tuning, we find that our method achieves comparable results with far fewer epochs re-quired during fine-tuning. This seems promising, increasing the training time for fine-tuning should yield even better results. In addition to this, a modified semi-supervised experiment was conducted, which demonstrated the improvement of contrastive adversarial training with the introduction of small amounts of labels.},
  keywords = {Computer Science - Computer Vision and Pattern Recognition},
  file = {/Users/Nasy/Zotero/storage/IR8R5AE9/Karim et al. - 2021 - Adversarial Training for Face Recognition Systems .pdf;/Users/Nasy/Zotero/storage/78FC87QM/2110.html}
}

@unpublished{karpovPhysicsInformedMachineLearning2022,
  title = {Physics-{{Informed Machine Learning}} for {{Modeling Turbulence}} in {{Supernovae}}},
  author = {Karpov, Platon I. and Huang, Chengkun and Sitdikov, Iskandar and Fryer, Chris L. and Woosley, Stan and Pilania, Ghanshyam},
  date = {2022-05-17},
  eprint = {2205.08663},
  eprinttype = {arXiv},
  eprintclass = {astro-ph, physics:physics},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2205.08663},
  url = {http://arxiv.org/abs/2205.08663},
  urldate = {2022-05-19},
  abstract = {Turbulence plays an integral role in astrophysical phenomena, including core-collapse supernovae (CCSN). Unfortunately, current simulations must resort to using subgrid models for turbulence treatment, as direct numerical simulations (DNS) are too expensive to run. However, subgrid models used in CCSN simulations lack accuracy compared to DNS results. Recently, Machine Learning (ML) has shown impressive prediction capability for turbulence closure. We have developed a physics-informed, deep convolutional neural network (CNN) to preserve the realizability condition of Reynolds stress that is necessary for accurate turbulent pressure prediction. The applicability of the ML model was tested for magnetohydrodynamic (MHD) turbulence subgrid modeling in both stationary and dynamic regimes. Our future goal is to utilize our ML methodology within the MHD CCSN framework to investigate the effects of accurately-modeled turbulence on the explosion rate of these events.},
  keywords = {Astrophysics - High Energy Astrophysical Phenomena,Physics - Computational Physics},
  file = {/Users/Nasy/Zotero/storage/6BFB5HFI/Karpov et al. - 2022 - Physics-Informed Machine Learning for Modeling Tur.pdf;/Users/Nasy/Zotero/storage/PRC3WMP5/2205.html}
}

@article{kimTSpredRobustPrediction2024,
  title = {{{TSpred}}: A Robust Prediction Framework for {{TCR}}–Epitope Interactions Using Paired Chain {{TCR}} Sequence Data},
  shorttitle = {{{TSpred}}},
  author = {Kim, Ha Young and Kim, Sungsik and Park, Woong-Yang and Kim, Dongsup},
  date = {2024-08-01},
  journaltitle = {Bioinformatics},
  volume = {40},
  number = {8},
  pages = {btae472},
  issn = {1367-4811},
  doi = {10.1093/bioinformatics/btae472},
  url = {https://doi.org/10.1093/bioinformatics/btae472},
  urldate = {2024-10-07},
  abstract = {Prediction of T-cell receptor (TCR)–epitope interactions is important for many applications in biomedical research, such as cancer immunotherapy and vaccine design. The prediction of TCR–epitope interactions remains challenging especially for novel epitopes, due to the scarcity of available data.We propose TSpred, a new deep learning approach for the pan-specific prediction of TCR binding specificity based on paired chain TCR data. We develop a robust model that generalizes well to unseen epitopes by combining the predictive power of CNN and the attention mechanism. In particular, we design a reciprocal attention mechanism which focuses on extracting the patterns underlying TCR–epitope interactions. Upon a comprehensive evaluation of our model, we find that TSpred achieves state-of-the-art performances in both seen and unseen epitope specificity prediction tasks. Also, compared to other predictors, TSpred is more robust to bias related to peptide imbalance in the dataset. In addition, the reciprocal attention component of our model allows for model interpretability by capturing structurally important binding regions. Results indicate that TSpred is a robust and reliable method for the task of TCR–epitope binding prediction.Source code is available at https://github.com/ha01994/TSpred.},
  file = {/Users/Nasy/Zotero/storage/BZ3JWA39/Kim et al. - 2024 - TSpred a robust prediction framework for TCR–epitope interactions using paired chain TCR sequence d.pdf;/Users/Nasy/Zotero/storage/A5BPECPG/7721043.html}
}

@unpublished{kingmaAdamMethodStochastic2017,
  title = {Adam: {{A Method}} for {{Stochastic Optimization}}},
  shorttitle = {Adam},
  author = {Kingma, Diederik P. and Ba, Jimmy},
  date = {2017-01-29},
  eprint = {1412.6980},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/1412.6980},
  urldate = {2022-05-11},
  abstract = {We introduce Adam, an algorithm for first-order gradient-based optimization of stochastic objective functions, based on adaptive estimates of lower-order moments. The method is straightforward to implement, is computationally efficient, has little memory requirements, is invariant to diagonal rescaling of the gradients, and is well suited for problems that are large in terms of data and/or parameters. The method is also appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. The hyper-parameters have intuitive interpretations and typically require little tuning. Some connections to related algorithms, on which Adam was inspired, are discussed. We also analyze the theoretical convergence properties of the algorithm and provide a regret bound on the convergence rate that is comparable to the best known results under the online convex optimization framework. Empirical results demonstrate that Adam works well in practice and compares favorably to other stochastic optimization methods. Finally, we discuss AdaMax, a variant of Adam based on the infinity norm.},
  keywords = {Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/B398RJNT/Kingma and Ba - 2017 - Adam A Method for Stochastic Optimization.pdf;/Users/Nasy/Zotero/storage/2TSTCLTK/1412.html}
}

@unpublished{kipfSemiSupervisedClassificationGraph2017,
  title = {Semi-{{Supervised Classification}} with {{Graph Convolutional Networks}}},
  author = {Kipf, Thomas N. and Welling, Max},
  date = {2017-02-22},
  eprint = {1609.02907},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/1609.02907},
  urldate = {2022-03-11},
  abstract = {We present a scalable approach for semi-supervised learning on graph-structured data that is based on an efficient variant of convolutional neural networks which operate directly on graphs. We motivate the choice of our convolutional architecture via a localized first-order approximation of spectral graph convolutions. Our model scales linearly in the number of graph edges and learns hidden layer representations that encode both local graph structure and features of nodes. In a number of experiments on citation networks and on a knowledge graph dataset we demonstrate that our approach outperforms related methods by a significant margin.},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/NZ8LBH9G/Kipf and Welling - 2017 - Semi-Supervised Classification with Graph Convolut.pdf;/Users/Nasy/Zotero/storage/3BB3HFQN/1609.html;/Users/Nasy/Zotero/storage/BBLLA7LQ/forum.html}
}

@unpublished{klicperaPredictThenPropagate2019,
  title = {Predict Then {{Propagate}}: {{Graph Neural Networks}} Meet {{Personalized PageRank}}},
  shorttitle = {Predict Then {{Propagate}}},
  author = {Klicpera, Johannes and Bojchevski, Aleksandar and Günnemann, Stephan},
  date = {2019-02-27},
  eprint = {1810.05997},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/1810.05997},
  urldate = {2022-03-11},
  abstract = {Neural message passing algorithms for semi-supervised classification on graphs have recently achieved great success. However, for classifying a node these methods only consider nodes that are a few propagation steps away and the size of this utilized neighborhood is hard to extend. In this paper, we use the relationship between graph convolutional networks (GCN) and PageRank to derive an improved propagation scheme based on personalized PageRank. We utilize this propagation procedure to construct a simple model, personalized propagation of neural predictions (PPNP), and its fast approximation, APPNP. Our model's training time is on par or faster and its number of parameters on par or lower than previous models. It leverages a large, adjustable neighborhood for classification and can be easily combined with any neural network. We show that this model outperforms several recently proposed methods for semi-supervised classification in the most thorough study done so far for GCN-like models. Our implementation is available online.},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/8R4EI9JT/Klicpera et al. - 2019 - Predict then Propagate Graph Neural Networks meet.pdf;/Users/Nasy/Zotero/storage/MN6NYZPC/1810.html}
}

@online{kojimaLargeLanguageModels2023,
  title = {Large {{Language Models}} Are {{Zero-Shot Reasoners}}},
  author = {Kojima, Takeshi and Gu, Shixiang Shane and Reid, Machel and Matsuo, Yutaka and Iwasawa, Yusuke},
  date = {2023-01-29},
  eprint = {2205.11916},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/2205.11916},
  abstract = {Pretrained large language models (LLMs) are widely used in many sub-fields of natural language processing (NLP) and generally known as excellent few-shot learners with task-specific exemplars. Notably, chain of thought (CoT) prompting, a recent technique for eliciting complex multi-step reasoning through step-by-step answer examples, achieved the state-of-the-art performances in arithmetics and symbolic reasoning, difficult system-2 tasks that do not follow the standard scaling laws for LLMs. While these successes are often attributed to LLMs' ability for few-shot learning, we show that LLMs are decent zero-shot reasoners by simply adding "Let's think step by step" before each answer. Experimental results demonstrate that our Zero-shot-CoT, using the same single prompt template, significantly outperforms zero-shot LLM performances on diverse benchmark reasoning tasks including arithmetics (MultiArith, GSM8K, AQUA-RAT, SVAMP), symbolic reasoning (Last Letter, Coin Flip), and other logical reasoning tasks (Date Understanding, Tracking Shuffled Objects), without any hand-crafted few-shot examples, e.g. increasing the accuracy on MultiArith from 17.7\% to 78.7\% and GSM8K from 10.4\% to 40.7\% with large InstructGPT model (text-davinci-002), as well as similar magnitudes of improvements with another off-the-shelf large model, 540B parameter PaLM. The versatility of this single prompt across very diverse reasoning tasks hints at untapped and understudied fundamental zero-shot capabilities of LLMs, suggesting high-level, multi-task broad cognitive capabilities may be extracted by simple prompting. We hope our work not only serves as the minimal strongest zero-shot baseline for the challenging reasoning benchmarks, but also highlights the importance of carefully exploring and analyzing the enormous zero-shot knowledge hidden inside LLMs before crafting finetuning datasets or few-shot exemplars.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/G4SZH4CT/Kojima et al. - 2023 - Large Language Models are Zero-Shot Reasoners.pdf;/Users/Nasy/Zotero/storage/S6DBEJ2T/2205.html}
}

@article{korpelaEPICTRACEPredictingTCR2023,
  title = {{{EPIC-TRACE}}: Predicting {{TCR}} Binding to Unseen Epitopes Using Attention and Contextualized Embeddings},
  shorttitle = {{{EPIC-TRACE}}},
  author = {Korpela, Dani and Jokinen, Emmi and Dumitrescu, Alexandru and Huuhtanen, Jani and Mustjoki, Satu and Lähdesmäki, Harri},
  date = {2023-12-01},
  journaltitle = {Bioinformatics},
  volume = {39},
  number = {12},
  pages = {btad743},
  issn = {1367-4811},
  doi = {10.1093/bioinformatics/btad743},
  url = {https://doi.org/10.1093/bioinformatics/btad743},
  urldate = {2024-10-07},
  abstract = {T cells play an essential role in adaptive immune system to fight pathogens and cancer but may also give rise to autoimmune diseases. The recognition of a peptide–MHC (pMHC) complex by a T cell receptor (TCR) is required to elicit an immune response. Many machine learning models have been developed to predict the binding, but generalizing predictions to pMHCs outside the training data remains challenging.We have developed a new machine learning model that utilizes information about the TCR from both α and β chains, epitope sequence, and MHC. Our method uses ProtBERT embeddings for the amino acid sequences of both chains and the epitope, as well as convolution and multi-head attention architectures. We show the importance of each input feature as well as the benefit of including epitopes with only a few TCRs to the training data. We evaluate our model on existing databases and show that it compares favorably against other state-of-the-art models.https://github.com/DaniTheOrange/EPIC-TRACE.},
  file = {/Users/Nasy/Zotero/storage/IVQS8VNL/Korpela et al. - 2023 - EPIC-TRACE predicting TCR binding to unseen epitopes using attention and contextualized embeddings.pdf;/Users/Nasy/Zotero/storage/4REPINFP/7467056.html}
}

@article{krioukovHyperbolicGeometryComplex2010,
  title = {Hyperbolic {{Geometry}} of {{Complex Networks}}},
  author = {Krioukov, Dmitri and Papadopoulos, Fragkiskos and Kitsak, Maksim and Vahdat, Amin and Boguna, Marian},
  date = {2010-09-09},
  journaltitle = {Phys. Rev. E},
  volume = {82},
  number = {3},
  eprint = {1006.5169},
  eprinttype = {arXiv},
  eprintclass = {cond-mat, physics:physics},
  pages = {036106},
  issn = {1539-3755, 1550-2376},
  doi = {10.1103/PhysRevE.82.036106},
  url = {http://arxiv.org/abs/1006.5169},
  urldate = {2022-12-24},
  abstract = {We develop a geometric framework to study the structure and function of complex networks. We assume that hyperbolic geometry underlies these networks, and we show that with this assumption, heterogeneous degree distributions and strong clustering in complex networks emerge naturally as simple reflections of the negative curvature and metric property of the underlying hyperbolic geometry. Conversely, we show that if a network has some metric structure, and if the network degree distribution is heterogeneous, then the network has an effective hyperbolic geometry underneath. We then establish a mapping between our geometric framework and statistical mechanics of complex networks. This mapping interprets edges in a network as non-interacting fermions whose energies are hyperbolic distances between nodes, while the auxiliary fields coupled to edges are linear functions of these energies or distances. The geometric network ensemble subsumes the standard configuration model and classical random graphs as two limiting cases with degenerate geometric structures. Finally, we show that targeted transport processes without global topology knowledge, made possible by our geometric framework, are maximally efficient, according to all efficiency measures, in networks with strongest heterogeneity and clustering, and that this efficiency is remarkably robust with respect to even catastrophic disturbances and damages to the network structure.},
  keywords = {Computer Science - Networking and Internet Architecture,Condensed Matter - Disordered Systems and Neural Networks,Condensed Matter - Statistical Mechanics,Physics - Physics and Society},
  annotation = {468 citations (Crossref) [2022-12-24]},
  file = {/Users/Nasy/Zotero/storage/I7VLCZKB/Krioukov et al. - 2010 - Hyperbolic Geometry of Complex Networks.pdf;/Users/Nasy/Zotero/storage/RLSRN9UH/1006.html}
}

@online{lamGraphCastLearningSkillful2022,
  title = {{{GraphCast}}: {{Learning}} Skillful Medium-Range Global Weather Forecasting},
  shorttitle = {{{GraphCast}}},
  author = {Lam, Remi and Sanchez-Gonzalez, Alvaro and Willson, Matthew and Wirnsberger, Peter and Fortunato, Meire and Pritzel, Alexander and Ravuri, Suman and Ewalds, Timo and Alet, Ferran and Eaton-Rosen, Zach and Hu, Weihua and Merose, Alexander and Hoyer, Stephan and Holland, George and Stott, Jacklynn and Vinyals, Oriol and Mohamed, Shakir and Battaglia, Peter},
  date = {2022-12-24},
  eprint = {2212.12794},
  eprinttype = {arXiv},
  eprintclass = {physics},
  doi = {10.48550/arXiv.2212.12794},
  url = {http://arxiv.org/abs/2212.12794},
  urldate = {2023-01-27},
  abstract = {We introduce a machine-learning (ML)-based weather simulator--called "GraphCast"--which outperforms the most accurate deterministic operational medium-range weather forecasting system in the world, as well as all previous ML baselines. GraphCast is an autoregressive model, based on graph neural networks and a novel high-resolution multi-scale mesh representation, which we trained on historical weather data from the European Centre for Medium-Range Weather Forecasts (ECMWF)'s ERA5 reanalysis archive. It can make 10-day forecasts, at 6-hour time intervals, of five surface variables and six atmospheric variables, each at 37 vertical pressure levels, on a 0.25-degree latitude-longitude grid, which corresponds to roughly 25 x 25 kilometer resolution at the equator. Our results show GraphCast is more accurate than ECMWF's deterministic operational forecasting system, HRES, on 90.0\% of the 2760 variable and lead time combinations we evaluated. GraphCast also outperforms the most accurate previous ML-based weather forecasting model on 99.2\% of the 252 targets it reported. GraphCast can generate a 10-day forecast (35 gigabytes of data) in under 60 seconds on Cloud TPU v4 hardware. Unlike traditional forecasting methods, ML-based forecasting scales well with data: by training on bigger, higher quality, and more recent data, the skill of the forecasts can improve. Together these results represent a key step forward in complementing and improving weather modeling with ML, open new opportunities for fast, accurate forecasting, and help realize the promise of ML-based simulation in the physical sciences.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Physics - Atmospheric and Oceanic Physics},
  file = {/Users/Nasy/Zotero/storage/HP9ABALA/Lam et al. - 2022 - GraphCast Learning skillful medium-range global w.pdf;/Users/Nasy/Zotero/storage/PEHCADL6/2212.html}
}

@inproceedings{leDistributedRepresentationsSentences2014,
  title = {Distributed {{Representations}} of {{Sentences}} and {{Documents}}},
  booktitle = {Proceedings of the 31st {{International Conference}} on {{Machine Learning}}},
  author = {Le, Quoc and Mikolov, Tomas},
  date = {2014-06-18},
  pages = {1188--1196},
  publisher = {PMLR},
  issn = {1938-7228},
  url = {https://proceedings.mlr.press/v32/le14.html},
  urldate = {2022-06-07},
  abstract = {Many machine learning algorithms require the  input to be represented as a fixed length feature  vector. When it comes to texts, one of the most  common representations is bag-of-words. Despite their popularity, bag-of-words models have  two major weaknesses: they lose the ordering  of the words and they also ignore semantics of  the words. For example, "powerful," "strong"  and "Paris" are equally distant. In this paper,  we propose an unsupervised algorithm that learns  vector representations of sentences and text documents. This algorithm represents each document by a dense vector which is trained to predict  words in the document. Its construction gives our  algorithm the potential to overcome the weaknesses of bag-of-words models. Empirical results show that our technique outperforms bag-of-words models as well as other techniques for  text representations. Finally, we achieve new  state-of-the-art results on several text classification and sentiment analysis tasks.},
  eventtitle = {International {{Conference}} on {{Machine Learning}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/I4GHUUCI/Le and Mikolov - 2014 - Distributed Representations of Sentences and Docum.pdf}
}

@article{leemDecipheringLanguageAntibodies2022,
  title = {Deciphering the Language of Antibodies Using Self-Supervised Learning},
  author = {Leem, Jinwoo and Mitchell, Laura S. and Farmery, James H. R. and Barton, Justin and Galson, Jacob D.},
  date = {2022-05-18},
  journaltitle = {Patterns},
  pages = {100513},
  issn = {2666-3899},
  doi = {10.1016/j.patter.2022.100513},
  url = {https://www.sciencedirect.com/science/article/pii/S2666389922001052},
  abstract = {An individual’s B cell receptor (BCR) repertoire encodes information about past immune responses and potential for future disease protection. Deciphering the information stored in BCR sequence datasets will transform our understanding of disease and enable discovery of novel diagnostics and antibody therapeutics. A key challenge of BCR sequence analysis is the prediction of BCR properties from their amino acid sequence alone. Here, we present an antibody-specific language model, Antibody-specific Bidirectional Encoder Representation from Transformers (AntiBERTa), which provides a contextualized representation of BCR sequences. Following pre-training, we show that AntiBERTa embeddings capture biologically relevant information, generalizable to a range of applications. As a case study, we fine-tune AntiBERTa to predict paratope positions from an antibody sequence, outperforming public tools across multiple metrics. To our knowledge, AntiBERTa is the deepest protein-family-specific language model, providing a rich representation of BCRs. AntiBERTa embeddings are primed for multiple downstream tasks and can improve our understanding of the language of antibodies.},
  langid = {english},
  keywords = {antibodies,B cell receptors,language models,paratope prediction,representation learning,self-supervised learning,transfer learning,transformers},
  annotation = {1 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/WUX42Y2U/Leem et al. - 2022 - Deciphering the language of antibodies using self-.pdf;/Users/Nasy/Zotero/storage/4SKKXLWS/S2666389922001052.html}
}

@article{leemDecipheringLanguageAntibodies2022a,
  title = {Deciphering the Language of Antibodies Using Self-Supervised Learning},
  author = {Leem, Jinwoo and Mitchell, Laura S. and Farmery, James H. R. and Barton, Justin and Galson, Jacob D.},
  date = {2022-05-17},
  journaltitle = {PATTER},
  volume = {0},
  number = {0},
  publisher = {Elsevier},
  issn = {2666-3899},
  doi = {10.1016/j.patter.2022.100513},
  url = {https://www.cell.com/patterns/abstract/S2666-3899(22)00105-2},
  langid = {english},
  keywords = {and tested for one domain/problem,antibodies,B cell receptors,DSML2: Proof-of-concept Data science output has been formulated,implemented,language models,paratope prediction,representation learning,self-supervised learning,transfer learning,transformers},
  annotation = {1 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/JLU5YYQW/Leem et al. - 2022 - Deciphering the language of antibodies using self-.pdf;/Users/Nasy/Zotero/storage/W7NQVTWV/S2666-3899(22)00105-2.html}
}

@article{leePredictingCrossReactivityAntigen2020,
  title = {Predicting {{Cross-Reactivity}} and {{Antigen Specificity}} of {{T Cell Receptors}}},
  author = {Lee, Chloe H. and Salio, Mariolina and Napolitani, Giorgio and Ogg, Graham and Simmons, Alison and Koohy, Hashem},
  date = {2020},
  journaltitle = {Frontiers in Immunology},
  volume = {11},
  issn = {1664-3224},
  doi = {10.3389/fimmu.2020.565096},
  url = {https://www.frontiersin.org/articles/10.3389/fimmu.2020.565096},
  urldate = {2022-08-19},
  abstract = {Adaptive immune recognition is mediated by specific interactions between heterodimeric T cell receptors (TCRs) and their cognate peptide-MHC (pMHC) ligands, and the methods to accurately predict TCR:pMHC interaction would have profound clinical, therapeutic and pharmaceutical applications. Herein, we review recent developments in predicting cross-reactivity and antigen specificity of TCR recognition. We discuss current experimental and computational approaches to investigate cross-reactivity and antigen-specificity of TCRs and highlight how integrating kinetic, biophysical and structural features may offer valuable insights in modeling immunogenicity. We further underscore the close inter-relationship of these two interconnected notions and the need to investigate each in the light of the other for a better understanding of T cell responsiveness for the effective clinical applications.},
  file = {/Users/Nasy/Zotero/storage/A9KMKSN6/Lee et al. - 2020 - Predicting Cross-Reactivity and Antigen Specificit.pdf}
}

@online{liAdvanceMakingLanguage2022,
  title = {On the {{Advance}} of {{Making Language Models Better Reasoners}}},
  author = {Li, Yifei and Lin, Zeqi and Zhang, Shizhuo and Fu, Qiang and Chen, Bei and Lou, Jian-Guang and Chen, Weizhu},
  date = {2022-06-07},
  eprint = {2206.02336},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2206.02336},
  url = {http://arxiv.org/abs/2206.02336},
  abstract = {Large language models such as GPT-3 and PaLM have shown remarkable performance in few-shot learning. However, they still struggle with reasoning tasks such as the arithmetic benchmark GSM8K. Recent advances deliberately guide the language model to generate a chain of reasoning steps before producing the final answer, successfully boosting the GSM8K benchmark from 17.9\% to 58.1\% in terms of problem solving rate. In this paper, we propose a new approach, DiVeRSe (Diverse Verifier on Reasoning Step), to further advance their reasoning capability. DiVeRSe first explores different prompts to enhance the diversity in reasoning paths. Second, DiVeRSe introduces a verifier to distinguish good answers from bad answers for a better weighted voting. Finally, DiVeRSe verifies the correctness of each single step rather than all the steps in a whole. We conduct extensive experiments using the latest language model code-davinci-002 and demonstrate that DiVeRSe can achieve new state-of-the-art performance on six out of eight reasoning benchmarks (e.g., GSM8K 74.4\% to 83.2\%), outperforming the PaLM model with 540B parameters.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language},
  file = {/Users/Nasy/Zotero/storage/DSVIBI53/Li et al. - 2022 - On the Advance of Making Language Models Better Re.pdf;/Users/Nasy/Zotero/storage/UVT3742E/2206.html}
}

@inproceedings{liaoHeterogeneousGraphModel2014,
  title = {A {{Heterogeneous Graph Model}} for {{Social Opinion Detection}}},
  booktitle = {Machine {{Learning}} and {{Cybernetics}}},
  author = {Liao, Xiangwen and Huang, Yichao and Wei, Jingjing and Yu, Zhiyong and Chen, Guolong},
  editor = {Wang, Xizhao and Pedrycz, Witold and Chan, Patrick and He, Qiang},
  date = {2014},
  series = {Communications in {{Computer}} and {{Information Science}}},
  pages = {175--185},
  publisher = {Springer},
  location = {Berlin, Heidelberg},
  doi = {10.1007/978-3-662-45652-1_19},
  abstract = {Microblogging services, such as Twitter, have become popular for people to share their opinions towards a broad range of topics. It is a great challenge to get an overview of some important topics by reading all tweets every day. Previous researches such as opinion detection and opinion summarization have been studied for this problem. However, these works mainly focus on the content of text without taking the quality of short text and features of social media into consideration. In this paper, we propose a heterogeneous graph model for users’ opinion detection on microblog. We first extract keywords of topics. Then, a three-level microblog graph is constructed by combining user influence, word importance, post significance, and topic periodicity. Microblog posts are ranked from different topics by using the random walk algorithm. Experimental results on real a dataset validate the effectiveness of our approach. In comparison with baseline approaches, the proposed method achieves 8~\% improvement.},
  isbn = {978-3-662-45652-1},
  langid = {english},
  keywords = {Microblog graph,Opinion detection,Random walk,Social media},
  annotation = {0 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/6U97WDGC/Liao et al. - 2014 - A Heterogeneous Graph Model for Social Opinion Det.pdf}
}

@unpublished{liDeeperInsightsGraph2018,
  title = {Deeper {{Insights}} into {{Graph Convolutional Networks}} for {{Semi-Supervised Learning}}},
  author = {Li, Qimai and Han, Zhichao and Wu, Xiao-Ming},
  date = {2018-01-22},
  eprint = {1801.07606},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/1801.07606},
  urldate = {2022-03-11},
  abstract = {Many interesting problems in machine learning are being revisited with new deep learning tools. For graph-based semisupervised learning, a recent important development is graph convolutional networks (GCNs), which nicely integrate local vertex features and graph topology in the convolutional layers. Although the GCN model compares favorably with other state-of-the-art methods, its mechanisms are not clear and it still requires a considerable amount of labeled data for validation and model selection. In this paper, we develop deeper insights into the GCN model and address its fundamental limits. First, we show that the graph convolution of the GCN model is actually a special form of Laplacian smoothing, which is the key reason why GCNs work, but it also brings potential concerns of over-smoothing with many convolutional layers. Second, to overcome the limits of the GCN model with shallow architectures, we propose both co-training and self-training approaches to train GCNs. Our approaches significantly improve GCNs in learning with very few labels, and exempt them from requiring additional labels for validation. Extensive experiments on benchmarks have verified our theory and proposals.},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/M8FLU4QV/Li et al. - 2018 - Deeper Insights into Graph Convolutional Networks .pdf;/Users/Nasy/Zotero/storage/AZVJUWPH/1801.html}
}

@unpublished{liDeepGCNsCanGCNs2019,
  title = {{{DeepGCNs}}: {{Can GCNs Go}} as {{Deep}} as {{CNNs}}?},
  shorttitle = {{{DeepGCNs}}},
  author = {Li, Guohao and Müller, Matthias and Thabet, Ali and Ghanem, Bernard},
  date = {2019-08-19},
  eprint = {1904.03751},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/1904.03751},
  urldate = {2022-03-11},
  abstract = {Convolutional Neural Networks (CNNs) achieve impressive performance in a wide variety of fields. Their success benefited from a massive boost when very deep CNN models were able to be reliably trained. Despite their merits, CNNs fail to properly address problems with non-Euclidean data. To overcome this challenge, Graph Convolutional Networks (GCNs) build graphs to represent non-Euclidean data, borrow concepts from CNNs, and apply them in training. GCNs show promising results, but they are usually limited to very shallow models due to the vanishing gradient problem. As a result, most state-of-the-art GCN models are no deeper than 3 or 4 layers. In this work, we present new ways to successfully train very deep GCNs. We do this by borrowing concepts from CNNs, specifically residual/dense connections and dilated convolutions, and adapting them to GCN architectures. Extensive experiments show the positive effect of these deep GCN frameworks. Finally, we use these new concepts to build a very deep 56-layer GCN, and show how it significantly boosts performance (+3.7\% mIoU over state-of-the-art) in the task of point cloud semantic segmentation. We believe that the community can greatly benefit from this work, as it opens up many opportunities for advancing GCN-based research.},
  keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/BCNSM4JV/Li et al. - 2019 - DeepGCNs Can GCNs Go as Deep as CNNs.pdf;/Users/Nasy/Zotero/storage/QJEHW9NB/1904.html}
}

@article{liLearningKnowledgeGraph2021,
  title = {Learning {{Knowledge Graph Embedding With Heterogeneous Relation Attention Networks}}},
  author = {Li, Zhifei and Liu, Hai and Zhang, Zhaoli and Liu, Tingting and Xiong, Neal N.},
  date = {2021},
  journaltitle = {IEEE Transactions on Neural Networks and Learning Systems},
  pages = {1--13},
  issn = {2162-2388},
  doi = {10.1109/TNNLS.2021.3055147},
  abstract = {Knowledge graph (KG) embedding aims to study the embedding representation to retain the inherent structure of KGs. Graph neural networks (GNNs), as an effective graph representation technique, have shown impressive performance in learning graph embedding. However, KGs have an intrinsic property of heterogeneity, which contains various types of entities and relations. How to address complex graph data and aggregate multiple types of semantic information simultaneously is a critical issue. In this article, a novel heterogeneous GNNs framework based on attention mechanism is proposed. Specifically, the neighbor features of an entity are first aggregated under each relation-path. Then the importance of different relation-paths is learned through the relation features. Finally, each relation-path-based features with the learned weight values are aggregated to generate the embedding representation. Thus, the proposed method not only aggregates entity features from different semantic aspects but also allocates appropriate weights to them. This method can capture various types of semantic information and selectively aggregate informative features. The experiment results on three real-world KGs demonstrate superior performance when compared with several state-of-the-art methods.},
  eventtitle = {{{IEEE Transactions}} on {{Neural Networks}} and {{Learning Systems}}},
  keywords = {Aggregates,Computer architecture,Fuses,Graph heterogeneity,Graph neural networks,graph neural networks (GNNs),KGs,knowledge graph (KG) embedding,Learning systems,link prediction.,Semantics,Task analysis},
  annotation = {29 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/CCI7KDUG/authors.html}
}

@article{linetteNeoantigenVaccinesPass2017,
  title = {Neoantigen {{Vaccines Pass}} the {{Immunogenicity Test}}},
  author = {Linette, Gerald P. and Carreno, Beatriz M.},
  date = {2017-10-01},
  journaltitle = {Trends in Molecular Medicine},
  volume = {23},
  number = {10},
  eprint = {28867556},
  eprinttype = {pmid},
  pages = {869--871},
  publisher = {Elsevier},
  issn = {1471-4914, 1471-499X},
  doi = {10.1016/j.molmed.2017.08.007},
  url = {https://www.cell.com/trends/molecular-medicine/abstract/S1471-4914(17)30147-8},
  urldate = {2022-05-12},
  langid = {english},
  annotation = {34 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/TX3ST6VL/Linette and Carreno - 2017 - Neoantigen Vaccines Pass the Immunogenicity Test.pdf;/Users/Nasy/Zotero/storage/Y8XAJRY9/S1471-4914(17)30147-8.html}
}

@online{linLanguageModelsProtein2022,
  title = {Language Models of Protein Sequences at the Scale of Evolution Enable Accurate Structure Prediction},
  author = {Lin, Zeming and Akin, Halil and Rao, Roshan and Hie, Brian and Zhu, Zhongkai and Lu, Wenting and Costa, Allan dos Santos and Fazel-Zarandi, Maryam and Sercu, Tom and Candido, Sal and Rives, Alexander},
  date = {2022-07-21},
  eprinttype = {bioRxiv},
  eprintclass = {New Results},
  pages = {2022.07.20.500902},
  doi = {10.1101/2022.07.20.500902},
  url = {https://www.biorxiv.org/content/10.1101/2022.07.20.500902v1},
  urldate = {2022-08-21},
  abstract = {Large language models have recently been shown to develop emergent capabilities with scale, going beyond simple pattern matching to perform higher level reasoning and generate lifelike images and text. While language models trained on protein sequences have been studied at a smaller scale, little is known about what they learn about biology as they are scaled up. In this work we train models up to 15 billion parameters, the largest language models of proteins to be evaluated to date. We find that as models are scaled they learn information enabling the prediction of the three-dimensional structure of a protein at the resolution of individual atoms. We present ESMFold for high accuracy end-to-end atomic level structure prediction directly from the individual sequence of a protein. ESMFold has similar accuracy to AlphaFold2 and RoseTTAFold for sequences with low perplexity that are well understood by the language model. ESMFold inference is an order of magnitude faster than AlphaFold2, enabling exploration of the structural space of metagenomic proteins in practical timescales.},
  langid = {english},
  pubstate = {prepublished},
  annotation = {1 citations (Crossref) [2022-08-20]},
  file = {/Users/Nasy/Zotero/storage/9KIHZ2LI/Lin et al. - 2022 - Language models of protein sequences at the scale .pdf;/Users/Nasy/Zotero/storage/5DFHC28N/2022.07.20.html}
}

@article{liSwFLOWLargescaleDistributed2021,
  title = {{{swFLOW}}: {{A}} Large-Scale Distributed Framework for Deep Learning on {{Sunway TaihuLight}} Supercomputer},
  shorttitle = {{{swFLOW}}},
  author = {Li, Mingfan and Lin, Han and Chen, Junshi and Diaz, Jose Monsalve and Xiao, Qian and Lin, Rongfen and Wang, Fei and Gao, Guang R. and An, Hong},
  date = {2021-09-01},
  journaltitle = {Information Sciences},
  volume = {570},
  pages = {831--847},
  issn = {0020-0255},
  doi = {10.1016/j.ins.2020.12.079},
  url = {https://www.sciencedirect.com/science/article/pii/S0020025520312457},
  urldate = {2022-03-13},
  abstract = {Deep learning technology is widely used in many modern fields and a number of models and software frameworks have been proposed. However, it is still very difficult to process deep learning tasks efficiently on traditional high performance computing (HPC) systems. In this paper, we propose swFLOW: a large-scale distributed framework for deep learning on Sunway TaihuLight. Based on the performance analysis results of convolutional neural network (CNN), we optimize the convolutional layer, and get 10.42× speedup compared to the original version. As for distributed training, we use elastic averaging stochastic gradient descent (EASGD) algorithm to reduce communication. On 512 processes, we get a parallel efficiency of 81.01\% with communication period τ=8. Particularly, a decentralized implementation of distributed swFLOW system is presented to alleviate bottleneck of the central server. By using distributed swFLOW system, we can scale the batch size up to 4096 among 1024 concurrent processes for cancerous region detection algorithm. The successful application on swFLOW reveals the great opportunity for joint combination of deep learning and HPC system.},
  langid = {english},
  keywords = {Cancerous region detection,Convolutional neural networks,Deep learning,High performance computing},
  annotation = {3 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/X87FMKBK/S0020025520312457.html}
}

@inproceedings{liuHeterogeneousGraphNeural2018,
  title = {Heterogeneous {{Graph Neural Networks}} for {{Malicious Account Detection}}},
  booktitle = {Proceedings of the 27th {{ACM International Conference}} on {{Information}} and {{Knowledge Management}}},
  author = {Liu, Ziqi and Chen, Chaochao and Yang, Xinxing and Zhou, Jun and Li, Xiaolong and Song, Le},
  date = {2018-10-17},
  series = {{{CIKM}} '18},
  pages = {2077--2085},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3269206.3272010},
  url = {https://doi.org/10.1145/3269206.3272010},
  urldate = {2022-05-25},
  abstract = {We present, GEM, the first heterogeneous graph neural network approach for detecting malicious accounts at Alipay, one of the world's leading mobile cashless payment platform. Our approach, inspired from a connected subgraph approach, adaptively learns discriminative embeddings from heterogeneous account-device graphs based on two fundamental weaknesses of attackers, i.e. device aggregation and activity aggregation. For the heterogeneous graph consists of various types of nodes, we propose an attention mechanism to learn the importance of different types of nodes, while using the sum operator for modeling the aggregation patterns of nodes in each type. Experiments show that our approaches consistently perform promising results compared with competitive methods over time.},
  isbn = {978-1-4503-6014-2},
  keywords = {heterogeneous graphs,malicious account detection,neural graph networks},
  annotation = {70 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/URP4V3JA/Liu et al. - 2018 - Heterogeneous Graph Neural Networks for Malicious .pdf}
}

@article{liuHighlyDivergentTcell2013,
  title = {Highly Divergent {{T-cell}} Receptor Binding Modes Underlie Specific Recognition of a Bulged Viral Peptide Bound to a Human Leukocyte Antigen Class {{I}} Molecule},
  author = {Liu, Yu Chih and Miles, John J. and Neller, Michelle A. and Gostick, Emma and Price, David A. and Purcell, Anthony W. and McCluskey, James and Burrows, Scott R. and Rossjohn, Jamie and Gras, Stephanie},
  date = {2013-05-31},
  journaltitle = {J Biol Chem},
  volume = {288},
  number = {22},
  eprint = {23569211},
  eprinttype = {pmid},
  pages = {15442--15454},
  issn = {1083-351X},
  doi = {10.1074/jbc.M112.447185},
  abstract = {Human leukocyte antigen (HLA)-I molecules can present long peptides, yet the mechanisms by which T-cell receptors (TCRs) recognize featured pHLA-I landscapes are unclear. We compared the binding modes of three distinct human TCRs, CA5, SB27, and SB47, complexed with a "super-bulged" viral peptide (LPEPLPQGQLTAY) restricted by HLA-B*35:08. The CA5 and SB27 TCRs engaged HLA-B*35:08(LPEP) similarly, straddling the central region of the peptide but making limited contacts with HLA-B*35:08. Remarkably, the CA5 TCR did not contact the α1-helix of HLA-B*35:08. Differences in the CDR3β loop between the CA5 and SB27 TCRs caused altered fine specificities. Surprisingly, the SB47 TCR engaged HLA-B*35:08(LPEP) using a completely distinct binding mechanism, namely "bypassing" the bulged peptide and making extensive contacts with the extreme N-terminal end of HLA-B*35:08. This docking footprint included HLA-I residues not observed previously as TCR contact sites. The three TCRs exhibited differing patterns of alloreactivity toward closely related or distinct HLA-I allotypes. Thus, the human T-cell repertoire comprises a range of TCRs that can interact with "bulged" pHLA-I epitopes using unpredictable strategies, including the adoption of atypical footprints on the MHC-I.},
  langid = {english},
  pmcid = {PMC3668706},
  keywords = {CD8-Positive T-Lymphocytes,Complementarity Determining Regions,Herpesvirus 4 Human,HLA-B35 Antigen,Major Histocompatibility Complex (MHC),Peptides,Protein Structure Secondary,Protein Structure Tertiary,Receptors Antigen T-Cell,Structural Biology,T-cell Receptor,Viral Immunology,Viral Proteins,X-ray Crystallography},
  annotation = {32 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/B2UX4UVV/Liu et al. - 2013 - Highly divergent T-cell receptor binding modes und.pdf}
}

@online{liuHyperbolicGraphNeural2019,
  title = {Hyperbolic {{Graph Neural Networks}}},
  author = {Liu, Qi and Nickel, Maximilian and Kiela, Douwe},
  date = {2019-10-28},
  eprint = {1910.12892},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.1910.12892},
  url = {http://arxiv.org/abs/1910.12892},
  urldate = {2023-02-02},
  abstract = {Learning from graph-structured data is an important task in machine learning and artificial intelligence, for which Graph Neural Networks (GNNs) have shown great promise. Motivated by recent advances in geometric representation learning, we propose a novel GNN architecture for learning representations on Riemannian manifolds with differentiable exponential and logarithmic maps. We develop a scalable algorithm for modeling the structural properties of graphs, comparing Euclidean and hyperbolic geometry. In our experiments, we show that hyperbolic GNNs can lead to substantial improvements on various benchmark datasets.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/5RWH3V2F/Liu et al. - 2019 - Hyperbolic Graph Neural Networks.pdf;/Users/Nasy/Zotero/storage/P27FLPLG/1910.html}
}

@online{liuHyperbolicGraphNeural2019a,
  title = {Hyperbolic {{Graph Neural Networks}}},
  author = {Liu, Qi and Nickel, Maximilian and Kiela, Douwe},
  date = {2019-10-28},
  eprint = {1910.12892},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.1910.12892},
  url = {http://arxiv.org/abs/1910.12892},
  urldate = {2023-02-02},
  abstract = {Learning from graph-structured data is an important task in machine learning and artificial intelligence, for which Graph Neural Networks (GNNs) have shown great promise. Motivated by recent advances in geometric representation learning, we propose a novel GNN architecture for learning representations on Riemannian manifolds with differentiable exponential and logarithmic maps. We develop a scalable algorithm for modeling the structural properties of graphs, comparing Euclidean and hyperbolic geometry. In our experiments, we show that hyperbolic GNNs can lead to substantial improvements on various benchmark datasets.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/FZ3U72YB/Liu et al. - 2019 - Hyperbolic Graph Neural Networks.pdf;/Users/Nasy/Zotero/storage/7KRUQP7Y/1910.html}
}

@inproceedings{liuMGDVDRealtimeFramework2021,
  title = {{{MG-DVD}}: {{A Real-time Framework}} for {{Malware Variant Detection Based}} on {{Dynamic Heterogeneous Graph Learning}}},
  shorttitle = {{{MG-DVD}}},
  booktitle = {Proceedings of the {{Thirtieth International Joint Conference}} on {{Artificial Intelligence}}},
  author = {Liu, Chen and Li, Bo and Zhao, Jun and Su, Ming and Liu, Xu-Dong},
  date = {2021-08},
  pages = {1512--1519},
  publisher = {International Joint Conferences on Artificial Intelligence Organization},
  location = {Montreal, Canada},
  doi = {10.24963/ijcai.2021/209},
  url = {https://www.ijcai.org/proceedings/2021/209},
  urldate = {2022-05-26},
  abstract = {Detecting the newly emerging malware variants in real time is crucial for mitigating cyber risks and proactively blocking intrusions. In this paper, we propose MG-DVD, a novel detection framework based on dynamic heterogeneous graph learning, to detect malware variants in real time. Particularly, MG-DVD first models the fine-grained execution event streams of malware variants into dynamic heterogeneous graphs and investigates real-world meta-graphs between malware objects, which can effectively characterize more discriminative malicious evolutionary patterns between malware and their variants. Then, MG-DVD presents two dynamic walk-based heterogeneous graph learning methods to learn more comprehensive representations of malware variants, which significantly reduces the cost of the entire graph retraining. As a result, MG-DVD is equipped with the ability to detect malware variants in real time, and it presents better interpretability by introducing meaningful meta-graphs. Comprehensive experiments on large-scale samples prove that our proposed MG-DVD outperforms state-of-the-art methods in detecting malware variants in terms of effectiveness and efficiency.},
  eventtitle = {Thirtieth {{International Joint Conference}} on {{Artificial Intelligence}} \{\vphantom\}{{IJCAI-21}}\vphantom\{\}},
  isbn = {978-0-9992411-9-6},
  langid = {english},
  annotation = {0 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/HPJ2QGXE/Liu et al. - 2021 - MG-DVD A Real-time Framework for Malware Variant .pdf}
}

@book{liuParallelizingConvolutionalNeural2015,
  title = {Parallelizing {{Convolutional Neural Networks}} on {{Intel}}\textasciicircum\{\textbackslash textregistered \} {{Many Integrated Core Architecture}}},
  author = {Liu, Junjie and Wang, Haixia and Wang, Dongsheng and Gao, Yuan and Li, Zuofeng},
  date = {2015-03-24},
  pages = {82},
  doi = {10.1007/978-3-319-16086-3_6},
  abstract = {Convolutional neural networks (CNNs) are state-of-the-art machine learning algorithm in low-resolution vision tasks and are widely applied in many applications. However, the training process of them is very time-consuming. As a result, many approaches have been proposed in which parallelization is one of the most effective. In this article, we parallelized a classic CNN on a new platform of Intel\textbackslash (\textasciicircum\{\{\textbackslash textregistered \}\}\textbackslash ) Xeon Phi\textbackslash (\textasciicircum\{\{\{\textbackslash text \{TM\}\}\}\}\textbackslash ) Coprocessor with OpenMP. Our implementation acquired 131\textbackslash (\textbackslash times \textbackslash ) speedup against the serial version running on the coprocessor itself and 8.3\textbackslash (\textbackslash times \textbackslash ) speedup against the serial baseline on the Xeon\textbackslash (\textasciicircum\{\{\textbackslash textregistered \}\}\textbackslash ) E5-2697 CPU.},
  pagetotal = {71}
}

@online{liuRoBERTaRobustlyOptimized2019,
  title = {{{RoBERTa}}: {{A Robustly Optimized BERT Pretraining Approach}}},
  shorttitle = {{{RoBERTa}}},
  author = {Liu, Yinhan and Ott, Myle and Goyal, Naman and Du, Jingfei and Joshi, Mandar and Chen, Danqi and Levy, Omer and Lewis, Mike and Zettlemoyer, Luke and Stoyanov, Veselin},
  date = {2019-07-26},
  eprint = {1907.11692},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.1907.11692},
  url = {http://arxiv.org/abs/1907.11692},
  urldate = {2022-06-24},
  abstract = {Language model pretraining has led to significant performance gains but careful comparison between different approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes, and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results highlight the importance of previously overlooked design choices, and raise questions about the source of recently reported improvements. We release our models and code.},
  pubstate = {prepublished},
  keywords = {Computer Science - Computation and Language},
  file = {/Users/Nasy/Zotero/storage/NN4IJ7J9/Liu et al. - 2019 - RoBERTa A Robustly Optimized BERT Pretraining App.pdf;/Users/Nasy/Zotero/storage/W7AHBLUH/1907.html}
}

@online{liuVisualInstructionTuning2023,
  title = {Visual {{Instruction Tuning}}},
  author = {Liu, Haotian and Li, Chunyuan and Wu, Qingyang and Lee, Yong Jae},
  date = {2023-04-17},
  eprint = {2304.08485},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2304.08485},
  url = {http://arxiv.org/abs/2304.08485},
  urldate = {2023-07-16},
  abstract = {Instruction tuning large language models (LLMs) using machine-generated instruction-following data has improved zero-shot capabilities on new tasks, but the idea is less explored in the multimodal field. In this paper, we present the first attempt to use language-only GPT-4 to generate multimodal language-image instruction-following data. By instruction tuning on such generated data, we introduce LLaVA: Large Language and Vision Assistant, an end-to-end trained large multimodal model that connects a vision encoder and LLM for general-purpose visual and language understanding.Our early experiments show that LLaVA demonstrates impressive multimodel chat abilities, sometimes exhibiting the behaviors of multimodal GPT-4 on unseen images/instructions, and yields a 85.1\% relative score compared with GPT-4 on a synthetic multimodal instruction-following dataset. When fine-tuned on Science QA, the synergy of LLaVA and GPT-4 achieves a new state-of-the-art accuracy of 92.53\%. We make GPT-4 generated visual instruction tuning data, our model and code base publicly available.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/CL445547/Liu et al. - 2023 - Visual Instruction Tuning.pdf;/Users/Nasy/Zotero/storage/BRSQ35S9/2304.html}
}

@article{longFullyConvolutionalNetworks,
  title = {Fully {{Convolutional Networks}} for {{Semantic Segmentation}}},
  author = {Long, Jonathan and Shelhamer, Evan and Darrell, Trevor},
  pages = {10},
  abstract = {Convolutional networks are powerful visual models that yield hierarchies of features. We show that convolutional networks by themselves, trained end-to-end, pixelsto-pixels, exceed the state-of-the-art in semantic segmentation. Our key insight is to build “fully convolutional” networks that take input of arbitrary size and produce correspondingly-sized output with efficient inference and learning. We define and detail the space of fully convolutional networks, explain their application to spatially dense prediction tasks, and draw connections to prior models. We adapt contemporary classification networks (AlexNet [20], the VGG net [31], and GoogLeNet [32]) into fully convolutional networks and transfer their learned representations by fine-tuning [3] to the segmentation task. We then define a skip architecture that combines semantic information from a deep, coarse layer with appearance information from a shallow, fine layer to produce accurate and detailed segmentations. Our fully convolutional network achieves stateof-the-art segmentation of PASCAL VOC (20\% relative improvement to 62.2\% mean IU on 2012), NYUDv2, and SIFT Flow, while inference takes less than one fifth of a second for a typical image.},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/IKNBMYAP/Long et al. - Fully Convolutional Networks for Semantic Segmenta.pdf}
}

@inproceedings{longFullyConvolutionalNetworks2015,
  title = {Fully Convolutional Networks for Semantic Segmentation},
  author = {Long, Jonathan and Shelhamer, Evan and Darrell, Trevor},
  date = {2015-06-01},
  pages = {3431--3440},
  publisher = {IEEE Computer Society},
  issn = {1063-6919},
  doi = {10.1109/CVPR.2015.7298965},
  url = {https://www.computer.org/csdl/proceedings-article/cvpr/2015/07298965/12OmNy49sME},
  urldate = {2022-06-07},
  abstract = {Convolutional networks are powerful visual models that yield hierarchies of features. We show that convolutional networks by themselves, trained end-to-end, pixels-to-pixels, exceed the state-of-the-art in semantic segmentation. Our key insight is to build “fully convolutional” networks that take input of arbitrary size and produce correspondingly-sized output with efficient inference and learning. We define and detail the space of fully convolutional networks, explain their application to spatially dense prediction tasks, and draw connections to prior models. We adapt contemporary classification networks (AlexNet [20], the VGG net [31], and GoogLeNet [32]) into fully convolutional networks and transfer their learned representations by fine-tuning [3] to the segmentation task. We then define a skip architecture that combines semantic information from a deep, coarse layer with appearance information from a shallow, fine layer to produce accurate and detailed segmentations. Our fully convolutional network achieves state-of-the-art segmentation of PASCAL VOC (20\% relative improvement to 62.2\% mean IU on 2012), NYUDv2, and SIFT Flow, while inference takes less than one fifth of a second for a typical image.},
  eventtitle = {2015 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
  isbn = {978-1-4673-6964-0},
  langid = {english},
  annotation = {12516 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/REUZNPVU/Long et al. - 2015 - Fully convolutional networks for semantic segmenta.pdf;/Users/Nasy/Zotero/storage/KZB8RDQI/12OmNy49sME.html}
}

@inproceedings{longHGKGNNHeterogeneousGraph2021,
  title = {{{HGK-GNN}}: {{Heterogeneous Graph Kernel}} Based {{Graph Neural Networks}}},
  shorttitle = {{{HGK-GNN}}},
  booktitle = {Proceedings of the 27th {{ACM SIGKDD Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Long, Qingqing and Xu, Lingjun and Fang, Zheng and Song, Guojie},
  date = {2021-08-14},
  series = {{{KDD}} '21},
  pages = {1129--1138},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3447548.3467429},
  url = {https://doi.org/10.1145/3447548.3467429},
  urldate = {2022-05-25},
  abstract = {While Graph Neural Networks (GNNs) have achieved remarkable results in a variety of applications, recent studies exposed important shortcomings in their ability to capture heterogeneous structures and attributes of an underlying graph. Furthermore, though many Heterogeneous GNN (HGNN) variants have been proposed and have achieved state-of-the-art results, there are limited theoretical understandings of their properties. To this end, we introduce graph kernel to HGNNs and develop a Heterogeneous Graph Kernel-based Graph Neural Networks (HGK-GNN). Specifically, we incorporate the Mahalanobis distance (MD) to build a Heterogeneous Graph Kernel (HGK), and incorporating it into deep neural architectures, thus leveraging a heterogeneous GNN with a heterogeneous aggregation scheme. Also, we mathematically bridge HGK-GNN to metapath-based HGNNs, which are the most popular and effective variants of HGNNs. We theoretically analyze HGK-GNN with the indispensable Encoder and Aggregator component in metapath-based HGNNs, through which we provide a theoretical perspective to understand the most popular HGNNs. To the best of our knowledge, we are the first to introduce HGK into the field of HGNNs, and mark a first step in the direction of theoretically understanding and analyzing HGNNs. Correspondingly, both graph and node classification experiments are leveraged to evaluate HGK-GNN, where HGK-GNN outperforms a wide range of baselines on six real-world datasets, endorsing the analysis.},
  isbn = {978-1-4503-8332-5},
  keywords = {graph kernels,heterogeneous graph convolutional network,heterogeneous networks,nosource},
  annotation = {1 citations (Crossref) [2022-08-03]}
}

@online{loshchilovSGDRStochasticGradient2017,
  title = {{{SGDR}}: {{Stochastic Gradient Descent}} with {{Warm Restarts}}},
  shorttitle = {{{SGDR}}},
  author = {Loshchilov, Ilya and Hutter, Frank},
  date = {2017-05-03},
  eprint = {1608.03983},
  eprinttype = {arXiv},
  doi = {10.48550/arXiv.1608.03983},
  url = {http://arxiv.org/abs/1608.03983},
  urldate = {2024-11-08},
  abstract = {Restart techniques are common in gradient-free optimization to deal with multimodal functions. Partial warm restarts are also gaining popularity in gradient-based optimization to improve the rate of convergence in accelerated gradient schemes to deal with ill-conditioned functions. In this paper, we propose a simple warm restart technique for stochastic gradient descent to improve its anytime performance when training deep neural networks. We empirically study its performance on the CIFAR-10 and CIFAR-100 datasets, where we demonstrate new state-of-the-art results at 3.14\% and 16.21\%, respectively. We also demonstrate its advantages on a dataset of EEG recordings and on a downsampled version of the ImageNet dataset. Our source code is available at https://github.com/loshchil/SGDR},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing,Mathematics - Optimization and Control},
  file = {/Users/Nasy/Zotero/storage/8BVGRD52/Loshchilov and Hutter - 2017 - SGDR Stochastic Gradient Descent with Warm Restarts.pdf;/Users/Nasy/Zotero/storage/35VKYUL9/1608.html}
}

@article{luDeepLearningbasedPrediction2021,
  title = {Deep Learning-Based Prediction of the {{T}} Cell Receptor–Antigen Binding Specificity},
  author = {Lu, Tianshi and Zhang, Ze and Zhu, James and Wang, Yunguan and Jiang, Peixin and Xiao, Xue and Bernatchez, Chantale and Heymach, John V. and Gibbons, Don L. and Wang, Jun and Xu, Lin and Reuben, Alexandre and Wang, Tao},
  date = {2021-10},
  journaltitle = {Nat Mach Intell},
  volume = {3},
  number = {10},
  pages = {864--875},
  publisher = {Nature Publishing Group},
  issn = {2522-5839},
  doi = {10.1038/s42256-021-00383-2},
  url = {https://www.nature.com/articles/s42256-021-00383-2},
  abstract = {Neoantigens play a key role in the recognition of tumour cells by T cells; however, only a small proportion of neoantigens truly elicit T-cell responses, and few clues exist as to which neoantigens are recognized by which T-cell receptors (TCRs). We built a transfer learning-based model named the pMHC–TCR binding prediction network (pMTnet) to predict TCR binding specificities of the neoantigens—and T cell antigens in general—presented by class I major histocompatibility complexes. pMTnet was comprehensively validated by a series of analyses and exhibited great advances over previous works. By applying pMTnet to human tumour genomics data, we discovered that neoantigens were generally more immunogenic than self-antigens, but human endogenous retrovirus E (a special type of self-antigen that is reactivated in kidney cancer) is more immunogenic than neoantigens. We further discovered that patients with more clonally expanded T cells that exhibit better affinity against truncal rather than subclonal neoantigens had more favourable prognosis and treatment response to immunotherapy in melanoma and lung cancer but not in kidney cancer. Predicting TCR–neoantigen/antigen pairing is one of the most daunting challenges in modern immunology; however, we achieved an accurate prediction of the pairing using only the TCR sequence (CDR3β), antigen sequence and class I major histocompatibility complex allele, and our work revealed unique insights into the interactions between TCRs and major histocompatibility complexes in human tumours, using pMTnet as a discovery tool.},
  issue = {10},
  langid = {english},
  keywords = {Adaptive immunity,Antigen processing and presentation,Computational models,Data mining,Machine learning},
  annotation = {9 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/9CH6DVID/Lu et al. - 2021 - Deep learning-based prediction of the T cell recep.pdf;/Users/Nasy/Zotero/storage/LT4BZKJ9/s42256-021-00383-2.html}
}

@article{luDeepXDEDeepLearning2021,
  title = {{{DeepXDE}}: {{A Deep Learning Library}} for {{Solving Differential Equations}}},
  shorttitle = {{{DeepXDE}}},
  author = {Lu, Lu and Meng, Xuhui and Mao, Zhiping and Karniadakis, George Em},
  date = {2021-01},
  journaltitle = {SIAM Rev.},
  volume = {63},
  number = {1},
  pages = {208--228},
  publisher = {{Society for Industrial and Applied Mathematics}},
  issn = {0036-1445},
  doi = {10.1137/19M1274067},
  url = {https://epubs.siam.org/doi/10.1137/19M1274067},
  urldate = {2022-10-06},
  abstract = {Deep learning has achieved remarkable success in diverse applications; however, its use in solving partial differential equations (PDEs) has emerged only recently. Here, we present an overview of physics-informed neural networks (PINNs), which embed a PDE into the loss of the neural network using automatic differentiation. The PINN algorithm is simple, and it can be applied to different types of PDEs, including integro-differential equations, fractional PDEs, and stochastic PDEs. Moreover, from an implementation point of view, PINNs solve inverse problems as easily as forward problems. We propose a new residual-based adaptive refinement (RAR) method to improve the training efficiency of PINNs. For pedagogical reasons, we compare the PINN algorithm to a standard finite element method. We also present a Python library for PINNs, DeepXDE, which is designed to serve both as an educational tool to be used in the classroom as well as a research tool for solving problems in computational science and engineering. Specifically, DeepXDE can solve forward problems given initial and boundary conditions, as well as inverse problems given some extra measurements. DeepXDE supports complex-geometry domains based on the technique of constructive solid geometry and enables the user code to be compact, resembling closely the mathematical formulation. We introduce the usage of DeepXDE and its customizability, and we also demonstrate the capability of PINNs and the user-friendliness of DeepXDE for five different examples. More broadly, DeepXDE contributes to the more rapid development of the emerging scientific machine learning field.},
  keywords = {65-01,65-04,65L99,65M99,65N99,deep learning,DeepXDE,differential equations,education software,physics-informed neural networks,scientific machine learning},
  annotation = {179 citations (Crossref) [2022-10-06]},
  file = {/Users/Nasy/Zotero/storage/8GALEYKJ/Lu et al. - 2021 - DeepXDE A Deep Learning Library for Solving Diffe.pdf}
}

@article{luDeepXDEDeepLearning2021a,
  title = {{{DeepXDE}}: {{A Deep Learning Library}} for {{Solving Differential Equations}}},
  shorttitle = {{{DeepXDE}}},
  author = {Lu, Lu and Meng, Xuhui and Mao, Zhiping and Karniadakis, George Em},
  date = {2021-01},
  journaltitle = {SIAM Rev.},
  volume = {63},
  number = {1},
  pages = {208--228},
  publisher = {{Society for Industrial and Applied Mathematics}},
  issn = {0036-1445},
  doi = {10.1137/19M1274067},
  url = {https://epubs.siam.org/doi/10.1137/19M1274067},
  abstract = {Deep learning has achieved remarkable success in diverse applications; however, its use in solving partial differential equations (PDEs) has emerged only recently. Here, we present an overview of physics-informed neural networks (PINNs), which embed a PDE into the loss of the neural network using automatic differentiation. The PINN algorithm is simple, and it can be applied to different types of PDEs, including integro-differential equations, fractional PDEs, and stochastic PDEs. Moreover, from an implementation point of view, PINNs solve inverse problems as easily as forward problems. We propose a new residual-based adaptive refinement (RAR) method to improve the training efficiency of PINNs. For pedagogical reasons, we compare the PINN algorithm to a standard finite element method. We also present a Python library for PINNs, DeepXDE, which is designed to serve both as an educational tool to be used in the classroom as well as a research tool for solving problems in computational science and engineering. Specifically, DeepXDE can solve forward problems given initial and boundary conditions, as well as inverse problems given some extra measurements. DeepXDE supports complex-geometry domains based on the technique of constructive solid geometry and enables the user code to be compact, resembling closely the mathematical formulation. We introduce the usage of DeepXDE and its customizability, and we also demonstrate the capability of PINNs and the user-friendliness of DeepXDE for five different examples. More broadly, DeepXDE contributes to the more rapid development of the emerging scientific machine learning field.},
  keywords = {65-01,65-04,65L99,65M99,65N99,deep learning,DeepXDE,differential equations,education software,physics-informed neural networks,scientific machine learning},
  annotation = {198 citations (Crossref) [2022-11-04]},
  file = {/Users/Nasy/Zotero/storage/D35754NX/Lu et al. - 2021 - DeepXDE A Deep Learning Library for Solving Diffe.pdf}
}

@inproceedings{luFantasticallyOrderedPrompts2022,
  title = {Fantastically {{Ordered Prompts}} and {{Where}} to {{Find Them}}: {{Overcoming Few-Shot Prompt Order Sensitivity}}},
  shorttitle = {Fantastically {{Ordered Prompts}} and {{Where}} to {{Find Them}}},
  booktitle = {Proceedings of the 60th {{Annual Meeting}} of the {{Association}} for {{Computational Linguistics}} ({{Volume}} 1: {{Long Papers}})},
  author = {Lu, Yao and Bartolo, Max and Moore, Alastair and Riedel, Sebastian and Stenetorp, Pontus},
  date = {2022-05},
  pages = {8086--8098},
  publisher = {Association for Computational Linguistics},
  location = {Dublin, Ireland},
  doi = {10.18653/v1/2022.acl-long.556},
  url = {https://aclanthology.org/2022.acl-long.556},
  abstract = {When primed with only a handful of training samples, very large, pretrained language models such as GPT-3 have shown competitive results when compared to fully-supervised, fine-tuned, large, pretrained language models. We demonstrate that the order in which the samples are provided can make the difference between near state-of-the-art and random guess performance: essentially some permutations are “fantastic” and some not. We analyse this phenomenon in detail, establishing that: it is present across model sizes (even for the largest current models), it is not related to a specific subset of samples, and that a given good permutation for one model is not transferable to another. While one could use a development set to determine which permutations are performant, this would deviate from the true few-shot setting as it requires additional annotated data. Instead, we use the generative nature of language models to construct an artificial development set and based on entropy statistics of the candidate permutations on this set, we identify performant prompts. Our method yields a 13\% relative improvement for GPT-family models across eleven different established text classification tasks.},
  eventtitle = {{{ACL}} 2022},
  annotation = {9 citations (Crossref) [2023-04-09]},
  file = {/Users/Nasy/Zotero/storage/2ZFXRGE5/Lu et al. - 2022 - Fantastically Ordered Prompts and Where to Find Th.pdf}
}

@article{maduraTcellReceptorSpecificity2013,
  title = {T-Cell {{Receptor Specificity Maintained}} by {{Altered Thermodynamics}}},
  author = {Madura, Florian and Rizkallah, Pierre J. and Miles, Kim M. and Holland, Christopher J. and Bulek, Anna M. and Fuller, Anna and Schauenburg, Andrea J. A. and Miles, John J. and Liddy, Nathaniel and Sami, Malkit and Li, Yi and Hossain, Moushumi and Baker, Brian M. and Jakobsen, Bent K. and Sewell, Andrew K. and Cole, David K.},
  date = {2013-06-28},
  journaltitle = {J Biol Chem},
  volume = {288},
  number = {26},
  eprint = {23698002},
  eprinttype = {pmid},
  pages = {18766--18775},
  issn = {0021-9258},
  doi = {10.1074/jbc.M113.464560},
  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3696650/},
  urldate = {2022-03-19},
  abstract = {Background: The molecular principles governing T-cell specificity are poorly understood., Results: High affinity binding of a melanoma-specific T-cell receptor (TCR) is mediated through new MHC contacts and distinct thermodynamics., Conclusion: A novel thermodynamic mechanism upholds TCR-peptide specificity., Significance: TCRs can maintain peptide specificity using a mechanism that may enable widespread, safe enhancement of TCR binding affinity in therapeutic applications., The T-cell receptor (TCR) recognizes peptides bound to major histocompatibility molecules (MHC) and allows T-cells to interrogate the cellular proteome for internal anomalies from the cell surface. The TCR contacts both MHC and peptide in an interaction characterized by weak affinity (KD = 100 nm to 270 μm). We used phage-display to produce a melanoma-specific TCR (α24β17) with a 30,000-fold enhanced binding affinity (KD = 0.6 nm) to aid our exploration of the molecular mechanisms utilized to maintain peptide specificity. Remarkably, although the enhanced affinity was mediated primarily through new TCR-MHC contacts, α24β17 remained acutely sensitive to modifications at every position along the peptide backbone, mimicking the specificity of the wild type TCR. Thermodynamic analyses revealed an important role for solvation in directing peptide specificity. These findings advance our understanding of the molecular mechanisms that can govern the exquisite peptide specificity characteristic of TCR recognition.},
  pmcid = {PMC3696650},
  annotation = {29 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/5GQ6NFU2/Madura et al. - 2013 - T-cell Receptor Specificity Maintained by Altered .pdf}
}

@article{maEnsembleDeepConvolutional2022,
  title = {Ensemble of {{Deep Convolutional Neural Networks}} for Real-Time Gravitational Wave Signal Recognition},
  author = {Ma, CunLiang and Wang, Wei and Wang, He and Cao, Zhoujian},
  date = {2022-04-25},
  journaltitle = {Phys. Rev. D},
  volume = {105},
  number = {8},
  eprint = {2204.12058},
  eprinttype = {arXiv},
  pages = {083013},
  issn = {2470-0010, 2470-0029},
  doi = {10.1103/PhysRevD.105.083013},
  url = {http://arxiv.org/abs/2204.12058},
  urldate = {2022-04-27},
  abstract = {With the rapid development of deep learning technology, more and more researchers apply it to gravitational wave (GW) data analysis. Previous studies focused on a single deep learning model. In this paper we design an ensemble algorithm combining a set of convolutional neural networks (CNN) for GW signal recognition. The whole ensemble model consists of two sub-ensemble models. Each sub-ensemble model is also an ensemble model of deep learning. The two sub-ensemble models treat data of Hanford and Livinston detectors respectively. Proper voting scheme is adopted to combine the two sub-ensemble models to form the whole ensemble model. We apply this ensemble model to all reported GW events in the first observation and second observation runs (O1/O2) by LIGO-VIRGO Scientific Collaboration. We find that the ensemble algorithm can clearly identify all binary black hole merger events except GW170818. We also apply the ensemble model to one month (August 2017) data of O2. There is no false trigger happens although only O1 data are used for training. Our test results indicate that the ensemble learning algorithms can be used in real-time GW data analysis.},
  issue = {8},
  keywords = {Astrophysics - Instrumentation and Methods for Astrophysics},
  annotation = {0 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/6N3IE2QN/Ma et al. - 2022 - Ensemble of Deep Convolutional Neural Networks for.pdf;/Users/Nasy/Zotero/storage/I2PZWKKE/2204.html}
}

@article{maHighthroughputHighdimensionalSinglecell2021,
  title = {High-Throughput and High-Dimensional Single-Cell Analysis of Antigen-Specific {{CD8}}+ {{T}} Cells},
  author = {Ma, Ke-Yue and Schonnesen, Alexandra A. and He, Chenfeng and Xia, Amanda Y. and Sun, Eric and Chen, Eunise and Sebastian, Katherine R. and Guo, Yu-Wan and Balderas, Robert and Kulkarni-Date, Mrinalini and Jiang, Ning},
  date = {2021-12},
  journaltitle = {Nat Immunol},
  volume = {22},
  number = {12},
  pages = {1590--1598},
  publisher = {Nature Publishing Group},
  issn = {1529-2916},
  doi = {10.1038/s41590-021-01073-2},
  url = {https://www.nature.com/articles/s41590-021-01073-2},
  urldate = {2023-07-19},
  abstract = {Although critical to T cell function, antigen specificity is often omitted in high-throughput multiomics-based T cell profiling due to technical challenges. We describe a high-dimensional, tetramer-associated T cell antigen receptor (TCR) sequencing (TetTCR-SeqHD) method to simultaneously profile cognate antigen specificities, TCR sequences, targeted gene expression and surface-protein expression from tens of thousands of single cells. Using human polyclonal CD8+ T cells with known antigen specificity and TCR sequences, we demonstrate over 98\% precision for detecting the correct antigen specificity. We also evaluate gene expression and phenotypic differences among antigen-specific CD8+ T cells and characterize phenotype signatures of influenza- and Epstein–Barr virus-specific CD8+ T cells that are unique to their pathogen targets. Moreover, with the high-throughput capacity of profiling hundreds of antigens simultaneously, we apply TetTCR-SeqHD to identify antigens that preferentially enrich cognate CD8+ T cells in patients with type 1 diabetes compared to healthy controls and discover a TCR that cross-reacts with diabetes-related and microbiome antigens. TetTCR-SeqHD is a powerful approach for profiling T cell responses in humans and mice.},
  issue = {12},
  langid = {english},
  keywords = {RNA sequencing,T-cell receptor},
  annotation = {26 citations (Crossref) [2023-07-18]},
  file = {/Users/Nasy/Zotero/storage/7IELDCW9/Ma et al. - 2021 - High-throughput and high-dimensional single-cell a.pdf}
}

@book{martinManifoldTheoryIntroduction2002,
  title = {Manifold {{Theory}}: {{An Introduction}} for {{Mathematical Physicists}}},
  shorttitle = {Manifold {{Theory}}},
  author = {Martin, D.},
  date = {2002-03-01},
  eprint = {BpijAgAAQBAJ},
  eprinttype = {googlebooks},
  publisher = {Elsevier},
  abstract = {This account of basic manifold theory and global analysis, based on senior undergraduate and post-graduate courses at Glasgow University for students and researchers in theoretical physics, has been proven over many years. The treatment is rigorous yet less condensed than in books written primarily for pure mathematicians. Prerequisites include knowledge of basic linear algebra and topology. Topology is included in two appendices because many courses on mathematics for physics students do not include this subject.Provides a comprehensive account of basic manifold theory for post-graduate studentsIntroduces the basic theory of differential geometry to students in theoretical physics and mathematicsContains more than 130 exercises, with helpful hints and solutions},
  isbn = {978-0-85709-963-1},
  langid = {english},
  pagetotal = {424},
  keywords = {Mathematics / Geometry / General}
}

@inproceedings{mathieuContinuousHierarchicalRepresentations2019,
  title = {Continuous {{Hierarchical Representations}} with {{Poincaré Variational Auto-Encoders}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Mathieu, Emile and Le Lan, Charline and Maddison, Chris J. and Tomioka, Ryota and Teh, Yee Whye},
  date = {2019},
  volume = {32},
  publisher = {Curran Associates, Inc.},
  url = {https://proceedings.neurips.cc/paper/2019/hash/0ec04cb3912c4f08874dd03716f80df1-Abstract.html},
  abstract = {The Variational Auto-Encoder (VAE)  is a popular method for learning a generative model and embeddings of the data. Many real datasets are hierarchically structured. However, traditional VAEs map data in a Euclidean latent space which cannot efficiently embed tree-like structures. Hyperbolic spaces with negative curvature can. We therefore endow VAEs with a Poincaré ball model of hyperbolic geometry as a latent space and rigorously derive the necessary methods to work with two main Gaussian generalisations on that space. We empirically show better generalisation to unseen data than the Euclidean counterpart, and can qualitatively and quantitatively better recover hierarchical structures.},
  keywords = {⛔ No DOI found},
  file = {/Users/Nasy/Zotero/storage/FY6TG6MF/Mathieu et al. - 2019 - Continuous Hierarchical Representations with Poinc.pdf}
}

@unpublished{mcisaacUsingMachineLearning2022,
  title = {Using Machine Learning to Auto-Tune Chi-Squared Tests for Gravitational Wave Searches},
  author = {McIsaac, Connor and Harry, Ian},
  date = {2022-03-07},
  eprint = {2203.03449},
  eprinttype = {arXiv},
  eprintclass = {astro-ph, physics:gr-qc},
  url = {http://arxiv.org/abs/2203.03449},
  urldate = {2022-03-08},
  abstract = {The sensitivity of gravitational wave searches is reduced by the presence of non-Gaussian noise in the detector data. These non-Gaussianities often match well with the template waveforms used in matched filter searches, and require signal-consistency tests to distinguish them from astrophysical signals. However, empirically tuning these tests for maximum efficacy is time consuming and limits the complexity of these tests. In this work we demonstrate a framework to use machine-learning techniques to automatically tune signal-consistency tests. We implement a new \$\textbackslash chi\textasciicircum 2\$ signal-consistency test targeting the large population of noise found in searches for intermediate mass black hole binaries, training the new test using the framework set out in this paper. We find that this method effectively trains a complex model to down-weight the noise, while leaving the signal population relatively unaffected. This improves the sensitivity of the search by \$\textbackslash sim 11\textbackslash\%\$ for signals with masses \${$>$} 300 M\_\textbackslash odot\$. In the future this framework could be used to implement new tests in any of the commonly used matched-filter search algorithms, further improving the sensitivity of our searches.},
  keywords = {Astrophysics - High Energy Astrophysical Phenomena,Astrophysics - Instrumentation and Methods for Astrophysics,General Relativity and Quantum Cosmology},
  file = {/Users/Nasy/Zotero/storage/KDSN6CDS/McIsaac and Harry - 2022 - Using machine learning to auto-tune chi-squared te.pdf;/Users/Nasy/Zotero/storage/VVKEYDAR/2203.html}
}

@article{meherDeepLearningAstronomy2021,
  title = {Deep Learning in Astronomy: A Tutorial Perspective},
  shorttitle = {Deep Learning in Astronomy},
  author = {Meher, Saroj K. and Panda, Ganapati},
  date = {2021-09-01},
  journaltitle = {Eur. Phys. J. Spec. Top.},
  volume = {230},
  number = {10},
  pages = {2285--2317},
  issn = {1951-6401},
  doi = {10.1140/epjs/s11734-021-00207-9},
  url = {https://doi.org/10.1140/epjs/s11734-021-00207-9},
  urldate = {2022-07-29},
  abstract = {Astronomy is a branch of science that covers the study and analysis of all extraterrestrial objects and their phenomena. The study brings together the aspects of mathematics, physics, and chemistry to elucidate the origin, evolution, and functions of the Universe and the celestial bodies contained within it. It is a highly complex and challenging task to study and analyze astrophysical phenomena in real-life scenarios by human beings. We need some systems and approaches as our helping hands, that should work instinctively as per the requirements, such as machines. However, machine demands multifaceted information-rich data sets and their processes to work as per the requirement. This aspiration is eventually fulfilled with the advancement of technology, and machine-based analysis of the plethora of available data sets changed our understanding of the Universe and astronomical discoveries. It led to a modern exploring environment that helps to understand observational astronomy (OA) better. The advanced approach of OA has become virtual day by day, which requires a panchromatic approach to the Universe for uncovering more precise and comprehensive physical images. These aspects make us realize that astronomical phenomena’ investigation is mostly data-dependent and necessitates a thorough understanding of complex data analysis and visualization methodologies. For improved analysis of the information obtained from the ancient stages of astronomical research—particularly the observed phenomena; mostly mathematical and statistical techniques were being used. Eventually, modern data analysis techniques played a role in analyzing these massive data and became the essential components of the era of the virtual observatory (VO). At present, the size and analysis of these data sets with complexities are at the challenging stage for VO and the bottleneck for major scientific and technology. Such scenarios of model-driven data analysis paradigm are experiencing a valuable companion of data-driven science for extracting the hidden and novel information from the data sets. A technology-enabled solution is the need of the present scenario in sizeable astronomical data analysis. In this regard, recently, the popularity of ML models is increasing very rapidly within the astronomical domain because of their ability to provide handy solutions. The present tutorial summarizes various aspects of ML that could rightly offer appreciable information and answers to astronomical phenomena. We have also aimed to accommodate different learning aspects of ML and deep learning (DL), including selecting, extracting, and preprocessing the input information.},
  langid = {english},
  annotation = {1 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/EN9SYLC8/Meher and Panda - 2021 - Deep learning in astronomy a tutorial perspective.pdf}
}

@online{MiningHeterogeneousInformation,
  title = {Mining {{Heterogeneous Information Networks}}: {{Principles}} and {{Methodologies}}},
  shorttitle = {Mining {{Heterogeneous Information Networks}}},
  url = {https://ieeexplore.ieee.org/document/6812698?arnumber=6812698},
  urldate = {2022-05-27},
  abstract = {Real-world physical and abstract data objects are interconnected, forming gigantic, interconnected networks. By structuring these data objects and interactions between these objects into multiple types, such networks become semi-structured heterogeneous information networks. Most real-world applications that handle big data, including interconnected social media and social networks, scientific, engineering, or medical information systems, online e-commerce systems, and most database systems, can be structured into heterogeneous information networks. Therefore, effective analysis of large-scale heterogeneous information networks poses an interesting but critical challenge. In this book, we investigate the principles and methodologies of mining heterogeneous information networks. Departing from many existing network models that view interconnected data as homogeneous graphs or networks, our semi-structured heterogeneous information network model leverages the rich semantics of typed nodes and links in a network and uncovers surprisingly rich knowledge from the network. This semi-structured heterogeneous network modeling leads to a series of new principles and powerful methodologies for mining interconnected data, including: (1) rank-based clustering and classification; (2) meta-path-based similarity search and mining; (3) relation strength-aware mining, and many other potential developments. This book introduces this new research frontier and points out some promising research directions. Table of Contents: Introduction / Ranking-Based Clustering / Classification of Heterogeneous Information Networks / Meta-Path-Based Similarity Search / Meta-Path-Based Relationship Prediction / Relation Strength-Aware Clustering with Incomplete Attributes / User-Guided Clustering via Meta-Path Selection / Research Frontiers},
  langid = {american},
  file = {/Users/Nasy/Zotero/storage/46EMJV8H/6812698.html}
}

@inproceedings{minNoisyChannelLanguage2022,
  title = {Noisy {{Channel Language Model Prompting}} for {{Few-Shot Text Classification}}},
  booktitle = {Proceedings of the 60th {{Annual Meeting}} of the {{Association}} for {{Computational Linguistics}} ({{Volume}} 1: {{Long Papers}})},
  author = {Min, Sewon and Lewis, Mike and Hajishirzi, Hannaneh and Zettlemoyer, Luke},
  date = {2022-05},
  pages = {5316--5330},
  publisher = {Association for Computational Linguistics},
  location = {Dublin, Ireland},
  doi = {10.18653/v1/2022.acl-long.365},
  url = {https://aclanthology.org/2022.acl-long.365},
  abstract = {We introduce a noisy channel approach for language model prompting in few-shot text classification. Instead of computing the likelihood of the label given the input (referred as direct models), channel models compute the conditional probability of the input given the label, and are thereby required to explain every word in the input. We use channel models for recently proposed few-shot learning methods with no or very limited updates to the language model parameters, via either in-context demonstration or prompt tuning. Our experiments show that, for both methods, channel models significantly outperform their direct counterparts, which we attribute to their stability, i.e., lower variance and higher worst-case accuracy. We also present extensive ablations that provide recommendations for when to use channel prompt tuning instead of other competitive models (e.g., direct head tuning): channel prompt tuning is preferred when the number of training examples is small, labels in the training data are imbalanced, or generalization to unseen labels is required.},
  eventtitle = {{{ACL}} 2022},
  annotation = {2 citations (Crossref) [2023-04-09]},
  file = {/Users/Nasy/Zotero/storage/XKBDBFW6/Min et al. - 2022 - Noisy Channel Language Model Prompting for Few-Sho.pdf}
}

@online{minRethinkingRoleDemonstrations2022,
  title = {Rethinking the {{Role}} of {{Demonstrations}}: {{What Makes In-Context Learning Work}}?},
  shorttitle = {Rethinking the {{Role}} of {{Demonstrations}}},
  author = {Min, Sewon and Lyu, Xinxi and Holtzman, Ari and Artetxe, Mikel and Lewis, Mike and Hajishirzi, Hannaneh and Zettlemoyer, Luke},
  date = {2022-10-20},
  eprint = {2202.12837},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2202.12837},
  url = {http://arxiv.org/abs/2202.12837},
  abstract = {Large language models (LMs) are able to in-context learn -- perform a new task via inference alone by conditioning on a few input-label pairs (demonstrations) and making predictions for new inputs. However, there has been little understanding of how the model learns and which aspects of the demonstrations contribute to end task performance. In this paper, we show that ground truth demonstrations are in fact not required -- randomly replacing labels in the demonstrations barely hurts performance on a range of classification and multi-choce tasks, consistently over 12 different models including GPT-3. Instead, we find that other aspects of the demonstrations are the key drivers of end task performance, including the fact that they provide a few examples of (1) the label space, (2) the distribution of the input text, and (3) the overall format of the sequence. Together, our analysis provides a new way of understanding how and why in-context learning works, while opening up new questions about how much can be learned from large language models through inference alone.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language},
  file = {/Users/Nasy/Zotero/storage/37NL9HLZ/Min et al. - 2022 - Rethinking the Role of Demonstrations What Makes .pdf;/Users/Nasy/Zotero/storage/E2QE99SB/2202.html}
}

@unpublished{mishraOptimizationModelIndependent2021,
  title = {Optimization of Model Independent Gravitational Wave Search Using Machine Learning},
  author = {Mishra, Tanmaya and O'Brien, Brendan and Gayathri, V. and Szczepanczyk, Marek and Bhaumik, Shubhagata and Bartos, Imre and Klimenko, Sergey},
  date = {2021-05-10},
  eprint = {2105.04739},
  eprinttype = {arXiv},
  eprintclass = {gr-qc},
  url = {http://arxiv.org/abs/2105.04739},
  urldate = {2021-06-04},
  abstract = {The Coherent WaveBurst (cWB) search algorithm identifies generic gravitational wave (GW) signals in the LIGO-Virgo strain data. We propose a machine learning (ML) method to optimize the pipeline sensitivity to the special class of GW signals known as binary black hole (BBH) mergers. Here, we test the ML-enhanced cWB search on strain data from the first and second observing runs of Advanced LIGO and successfully recover all BBH events previously reported by cWB, with higher significance. For simulated events found with a false alarm rate less than \$1\textbackslash,\textbackslash mathrm\{yr\}\textasciicircum\{-1\}\$, we demonstrate the improvement in the detection efficiency of 26\% for stellar-mass BBH mergers and 16\% for intermediate mass black hole binary mergers. To demonstrate the robustness of the ML-enhanced search for the detection of generic BBH signals, we show that it has the increased sensitivity to the spin precessing or eccentric BBH events, even when trained on simulated quasi-circular BBH events with aligned spins.},
  keywords = {General Relativity and Quantum Cosmology},
  file = {/Users/Nasy/Zotero/storage/LEPAQYTV/Mishra et al. - 2021 - Optimization of model independent gravitational wa.pdf;/Users/Nasy/Zotero/storage/FVEEB8A8/2105.html}
}

@article{montemurroNetTCR20EnablesAccurate2021,
  title = {{{NetTCR-2}}.0 Enables Accurate Prediction of {{TCR-peptide}} Binding by Using Paired {{TCRα}} and β Sequence Data},
  author = {Montemurro, Alessandro and Schuster, Viktoria and Povlsen, Helle Rus and Bentzen, Amalie Kai and Jurtz, Vanessa and Chronister, William D. and Crinklaw, Austin and Hadrup, Sine R. and Winther, Ole and Peters, Bjoern and Jessen, Leon Eyrich and Nielsen, Morten},
  date = {2021-09-10},
  journaltitle = {Commun Biol},
  volume = {4},
  number = {1},
  pages = {1--13},
  publisher = {Nature Publishing Group},
  issn = {2399-3642},
  doi = {10.1038/s42003-021-02610-3},
  url = {https://www.nature.com/articles/s42003-021-02610-3},
  urldate = {2022-05-14},
  abstract = {Prediction of T-cell receptor (TCR) interactions with MHC-peptide complexes remains highly challenging. This challenge is primarily due to three dominant factors: data accuracy, data scarceness, and problem complexity. Here, we showcase that “shallow” convolutional neural network (CNN) architectures are adequate to deal with the problem complexity imposed by the length variations of TCRs. We demonstrate that current public bulk CDR3β-pMHC binding data overall is of low quality and that the development of accurate prediction models is contingent on paired α/β TCR sequence data corresponding to at least 150 distinct pairs for each investigated pMHC. In comparison, models trained on CDR3α or CDR3β data alone demonstrated a variable and pMHC specific relative performance drop. Together these findings support that T-cell specificity is predictable given the availability of accurate and sufficient paired TCR sequence data. NetTCR-2.0 is publicly available at https://services.healthtech.dtu.dk/service.php?NetTCR-2.0.},
  issue = {1},
  langid = {english},
  keywords = {Computational models,Data mining},
  annotation = {12 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/8DHX9BME/Montemurro et al. - 2021 - NetTCR-2.0 enables accurate prediction of TCR-pept.pdf;/Users/Nasy/Zotero/storage/IDIY7CAF/s42003-021-02610-3.html}
}

@article{morisCurrentChallengesUnseenepitope2021,
  title = {Current Challenges for Unseen-Epitope {{TCR}} Interaction Prediction and a New Perspective Derived from Image Classification},
  author = {Moris, Pieter and De Pauw, Joey and Postovskaya, Anna and Gielis, Sofie and De Neuter, Nicolas and Bittremieux, Wout and Ogunjimi, Benson and Laukens, Kris and Meysman, Pieter},
  date = {2021-07-01},
  journaltitle = {Briefings in Bioinformatics},
  volume = {22},
  number = {4},
  pages = {bbaa318},
  issn = {1477-4054},
  doi = {10.1093/bib/bbaa318},
  url = {https://doi.org/10.1093/bib/bbaa318},
  urldate = {2023-08-11},
  abstract = {The prediction of epitope recognition by T-cell receptors (TCRs) has seen many advancements in recent years, with several methods now available that can predict recognition for a specific set of epitopes. However, the generic case of evaluating all possible TCR-epitope pairs remains challenging, mainly due to the high diversity of the interacting sequences and the limited amount of currently available training data. In this work, we provide an overview of the current state of this unsolved problem. First, we examine appropriate validation strategies to accurately assess the generalization performance of generic TCR-epitope recognition models when applied to both seen and unseen epitopes. In addition, we present a novel feature representation approach, which we call ImRex (interaction map recognition). This approach is based on the pairwise combination of physicochemical properties of the individual amino acids in the CDR3 and epitope sequences, which provides a convolutional neural network with the combined representation of both sequences. Lastly, we highlight various challenges that are specific to TCR-epitope data and that can adversely affect model performance. These include the issue of selecting negative data, the imbalanced epitope distribution of curated TCR-epitope datasets and the potential exchangeability of TCR alpha and beta chains. Our results indicate that while extrapolation to unseen epitopes remains a difficult challenge, ImRex makes this feasible for a subset of epitopes that are not too dissimilar from the training data. We show that appropriate feature engineering methods and rigorous benchmark standards are required to create and validate TCR-epitope predictive models.},
  file = {/Users/Nasy/Zotero/storage/GK6KDM4W/Moris et al. - 2021 - Current challenges for unseen-epitope TCR interaction prediction and a new perspective derived from .pdf;/Users/Nasy/Zotero/storage/5PPNMR5Z/6042663.html}
}

@article{morisCurrentChallengesUnseenepitope2021a,
  title = {Current Challenges for Unseen-Epitope {{TCR}} Interaction Prediction and a New Perspective Derived from Image Classification},
  author = {Moris, Pieter and De Pauw, Joey and Postovskaya, Anna and Gielis, Sofie and De Neuter, Nicolas and Bittremieux, Wout and Ogunjimi, Benson and Laukens, Kris and Meysman, Pieter},
  date = {2021-07-01},
  journaltitle = {Briefings in Bioinformatics},
  volume = {22},
  number = {4},
  pages = {bbaa318},
  issn = {1477-4054},
  doi = {10.1093/bib/bbaa318},
  url = {https://doi.org/10.1093/bib/bbaa318},
  urldate = {2024-10-30},
  abstract = {The prediction of epitope recognition by T-cell receptors (TCRs) has seen many advancements in recent years, with several methods now available that can predict recognition for a specific set of epitopes. However, the generic case of evaluating all possible TCR-epitope pairs remains challenging, mainly due to the high diversity of the interacting sequences and the limited amount of currently available training data. In this work, we provide an overview of the current state of this unsolved problem. First, we examine appropriate validation strategies to accurately assess the generalization performance of generic TCR-epitope recognition models when applied to both seen and unseen epitopes. In addition, we present a novel feature representation approach, which we call ImRex (interaction map recognition). This approach is based on the pairwise combination of physicochemical properties of the individual amino acids in the CDR3 and epitope sequences, which provides a convolutional neural network with the combined representation of both sequences. Lastly, we highlight various challenges that are specific to TCR-epitope data and that can adversely affect model performance. These include the issue of selecting negative data, the imbalanced epitope distribution of curated TCR-epitope datasets and the potential exchangeability of TCR alpha and beta chains. Our results indicate that while extrapolation to unseen epitopes remains a difficult challenge, ImRex makes this feasible for a subset of epitopes that are not too dissimilar from the training data. We show that appropriate feature engineering methods and rigorous benchmark standards are required to create and validate TCR-epitope predictive models.},
  file = {/Users/Nasy/Zotero/storage/Z3AGL4FK/Moris et al. - 2021 - Current challenges for unseen-epitope TCR interaction prediction and a new perspective derived from.pdf;/Users/Nasy/Zotero/storage/L53W3ZMG/6042663.html}
}

@unpublished{mouldDeepLearningBayesian2022,
  title = {Deep Learning and {{Bayesian}} Inference of Gravitational-Wave Populations: Hierarchical Black-Hole Mergers},
  shorttitle = {Deep Learning and {{Bayesian}} Inference of Gravitational-Wave Populations},
  author = {Mould, Matthew and Gerosa, Davide and Taylor, Stephen R.},
  date = {2022-03-07},
  eprint = {2203.03651},
  eprinttype = {arXiv},
  eprintclass = {astro-ph, physics:gr-qc},
  url = {http://arxiv.org/abs/2203.03651},
  urldate = {2022-03-09},
  abstract = {The catalog of gravitational-wave events is growing, and so are our hopes of constraining the underlying astrophysics of stellar-mass black-hole mergers by inferring the distributions of, e.g., masses and spins. While conventional analyses parametrize this population with simple phenomenological models, we propose an innovative physics-first approach that compares gravitational-wave data against astrophysical simulations. We combine state-of-the-art deep-learning techniques with hierarchical Bayesian inference and exploit our approach to constrain the properties of repeated black-hole mergers from the gravitational-wave events in the most recent LIGO/Virgo catalog. Deep neural networks allow us to (i) construct a flexible population model that accurately emulates simulations of hierarchical mergers, (ii) estimate selection effects, and (iii) recover the branching ratios of repeated-merger generations. Among our results we find that: the distribution of host-environment escape speeds favors values {$<$}100 km s\$\textasciicircum\{-1\}\$ but is relatively flat; first-generation black holes are born with a maximum mass that is compatible with current estimates from pair-instability supernovae; there is multimodal substructure in both the mass and spin distributions due to repeated mergers; and binaries with a higher-generation component make up at least 15\% of the underlying population. The deep-learning pipeline we present is ready to be used in conjunction with realistic astrophysical population-synthesis predictions.},
  keywords = {Astrophysics - High Energy Astrophysical Phenomena,Astrophysics - Instrumentation and Methods for Astrophysics,General Relativity and Quantum Cosmology},
  file = {/Users/Nasy/Zotero/storage/AFLLLZ3T/Mould et al. - 2022 - Deep learning and Bayesian inference of gravitatio.pdf;/Users/Nasy/Zotero/storage/HQIWC78X/2203.html}
}

@software{naAutoPaperDownload2023,
  title = {Auto {{Paper}}: {{Download}} and Save Daily Papers},
  shorttitle = {Auto {{Paper}}},
  author = {Na, Saiyang},
  date = {2023-04-04},
  doi = {10.5281/zenodo.7796951},
  url = {https://zenodo.org/record/7796951},
  urldate = {2023-04-04},
  abstract = {The first version.},
  organization = {Zenodo},
  file = {/Users/Nasy/Zotero/storage/B45N347M/7796951.html}
}

@software{naNasyxxAutoPaper2023,
  title = {Nasyxx/Auto\_paper: V0.1.0},
  shorttitle = {Nasyxx/Auto\_paper},
  author = {Na, Saiyang},
  date = {2023-04-04},
  doi = {10.5281/zenodo.7796951},
  url = {https://zenodo.org/record/7796951},
  abstract = {The first version.},
  organization = {Zenodo},
  file = {/Users/Nasy/Zotero/storage/HHH5BS8I/7796951.html}
}

@software{naNasyxxEmacs20232023,
  title = {Nasyxx/Emacs.d: 2023 03 31 Version},
  shorttitle = {Nasyxx/Emacs.d},
  author = {Na, Saiyang and {haasdo95} and Berserk},
  date = {2023-04-01},
  doi = {10.5281/zenodo.7790390},
  url = {https://zenodo.org/record/7790390},
  urldate = {2023-04-01},
  abstract = {My emacs configuration},
  organization = {Zenodo},
  file = {/Users/Nasy/Zotero/storage/RIBD3ULV/Na et al. - 2023 - nasyxxemacs.d 2023 03 31 version.pdf}
}

@software{naNasyxxEmacs20232023a,
  title = {Nasyxx/Emacs.d: 2023 03 31 Version},
  shorttitle = {Nasyxx/Emacs.d},
  author = {Na, Saiyang and {haasdo95} and Berserk},
  date = {2023-04-01},
  doi = {10.5281/zenodo.7790390},
  url = {https://zenodo.org/record/7790390},
  urldate = {2023-04-01},
  abstract = {My emacs configuration},
  organization = {Zenodo},
  file = {/Users/Nasy/Zotero/storage/IDRL2QX5/Na et al. - 2023 - nasyxxemacs.d 2023 03 31 version.pdf}
}

@software{nasyBeamerThemeSMILE2022,
  title = {Beamer {{Theme}} in {{SMILE}}},
  author = {Nasy},
  date = {2022-01-04T06:48:44Z},
  origdate = {2021-11-01T19:06:32Z},
  url = {https://github.com/uta-smile/beamer-theme},
  urldate = {2022-04-15},
  organization = {uta-smile},
  keywords = {nosource}
}

@software{nasyNasyxxAutoPaper2023,
  title = {Nasyxx/Auto\_paper: V0.1.0},
  shorttitle = {Nasyxx/Auto\_paper},
  author = {Nasy},
  date = {2023-04-04},
  doi = {10.5281/zenodo.7796951},
  url = {https://zenodo.org/record/7796951},
  urldate = {2023-04-04},
  abstract = {The first version.},
  organization = {Zenodo},
  file = {/Users/Nasy/Zotero/storage/BEB5KI9W/7796951.html}
}

@online{NeoantigensCancerImmunotherapy,
  title = {Neoantigens in Cancer Immunotherapy},
  url = {https://www.science.org/doi/10.1126/science.aaa4971},
  urldate = {2022-05-12},
  file = {/Users/Nasy/Zotero/storage/DQP6N2IW/science.html}
}

@article{newmanPowerLawsPareto2005,
  title = {Power Laws, {{Pareto}} Distributions and {{Zipf}}'s Law},
  author = {Newman, MEJ},
  date = {2005-09-01},
  journaltitle = {Contemporary Physics},
  volume = {46},
  number = {5},
  pages = {323--351},
  publisher = {Taylor \& Francis},
  issn = {0010-7514},
  doi = {10.1080/00107510500052444},
  url = {https://doi.org/10.1080/00107510500052444},
  urldate = {2023-03-10},
  abstract = {When the probability of measuring a particular value of some quantity varies inversely as a power of that value, the quantity is said to follow a power law, also known variously as Zipf's law or the Pareto distribution. Power laws appear widely in physics, biology, earth and planetary sciences, economics and finance, computer science, demography and the social sciences. For instance, the distributions of the sizes of cities, earthquakes, forest fires, solar flares, moon craters and people's personal fortunes all appear to follow power laws. The origin of power-law behaviour has been a topic of debate in the scientific community for more than a century. Here we review some of the empirical evidence for the existence of power-law forms and the theories proposed to explain them.},
  annotation = {3403 citations (Crossref) [2023-03-09]},
  file = {/Users/Nasy/Zotero/storage/GFKJ2VM2/Newman - 2005 - Power laws, Pareto distributions and Zipf's law.pdf}
}

@inproceedings{nickelLearningContinuousHierarchies2018,
  title = {Learning {{Continuous Hierarchies}} in the {{Lorentz Model}} of {{Hyperbolic Geometry}}},
  booktitle = {Proceedings of the 35th {{International Conference}} on {{Machine Learning}}},
  author = {Nickel, Maximillian and Kiela, Douwe},
  date = {2018-07-03},
  pages = {3779--3788},
  publisher = {PMLR},
  issn = {2640-3498},
  url = {https://proceedings.mlr.press/v80/nickel18a.html},
  urldate = {2022-12-23},
  abstract = {We are concerned with the discovery of hierarchical relationships from large-scale unstructured similarity scores. For this purpose, we study different models of hyperbolic space and find that learning embeddings in the Lorentz model is substantially more efficient than in the Poincar\{é\}-ball model. We show that the proposed approach allows us to learn high-quality embeddings of large taxonomies which yield improvements over Poincar\{é\} embeddings, especially in low dimensions. Lastly, we apply our model to discover hierarchies in two real-world datasets: we show that an embedding in hyperbolic space can reveal important aspects of a company’s organizational structure as well as reveal historical relationships between language families.},
  eventtitle = {International {{Conference}} on {{Machine Learning}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/6SQ695J4/Nickel and Kiela - 2018 - Learning Continuous Hierarchies in the Lorentz Mod.pdf}
}

@inproceedings{nickelPoincareEmbeddingsLearning2017,
  title = {Poincaré {{Embeddings}} for {{Learning Hierarchical Representations}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Nickel, Maximillian and Kiela, Douwe},
  date = {2017},
  volume = {30},
  publisher = {Curran Associates, Inc.},
  url = {https://proceedings.neurips.cc/paper/2017/hash/59dfa2df42d9e3d41f5b02bfc32229dd-Abstract.html},
  abstract = {Representation learning has become an invaluable approach for learning from symbolic data such as text and graphs. However, state-of-the-art embedding methods typically do not account for latent hierarchical structures which are characteristic for many complex symbolic datasets. In this work, we introduce a new approach for learning hierarchical representations of symbolic data by embedding them into hyperbolic space -- or more precisely into an n-dimensional Poincaré ball. Due to the underlying hyperbolic geometry, this allows us to learn parsimonious representations of symbolic data by simultaneously capturing hierarchy and similarity. We present an efficient algorithm to learn the embeddings based on Riemannian optimization and show experimentally that Poincaré embeddings can outperform Euclidean embeddings significantly on data with latent hierarchies, both in terms of representation capacity and in terms of generalization ability.},
  keywords = {⛔ No DOI found},
  file = {/Users/Nasy/Zotero/storage/RPWNT2AV/Nickel and Kiela - 2017 - Poincaré Embeddings for Learning Hierarchical Repr.pdf}
}

@article{nielsenNetMHCpan3ImprovedPrediction2016,
  title = {{{NetMHCpan-3}}.0; Improved Prediction of Binding to {{MHC}} Class {{I}} Molecules Integrating Information from Multiple Receptor and Peptide Length Datasets},
  author = {Nielsen, Morten and Andreatta, Massimo},
  date = {2016-03-30},
  journaltitle = {Genome Medicine},
  volume = {8},
  number = {1},
  pages = {33},
  issn = {1756-994X},
  doi = {10.1186/s13073-016-0288-x},
  url = {https://doi.org/10.1186/s13073-016-0288-x},
  urldate = {2022-05-12},
  abstract = {Binding of peptides to MHC class I molecules (MHC-I) is essential for antigen presentation to cytotoxic T-cells.},
  keywords = {9mer Data,Binding Prediction,Length Profile,Peptide Length,Predictive Performance},
  annotation = {342 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/ZEPZTX7W/Nielsen and Andreatta - 2016 - NetMHCpan-3.0\; improved prediction of binding to M.pdf;/Users/Nasy/Zotero/storage/555VYK8L/s13073-016-0288-x.html}
}

@article{nielsenNetMHCpanMethodQuantitative2007,
  title = {{{NetMHCpan}}, a {{Method}} for {{Quantitative Predictions}} of {{Peptide Binding}} to {{Any HLA-A}} and -{{B Locus Protein}} of {{Known Sequence}}},
  author = {Nielsen, Morten and Lundegaard, Claus and Blicher, Thomas and Lamberth, Kasper and Harndahl, Mikkel and Justesen, Sune and Røder, Gustav and Peters, Bjoern and Sette, Alessandro and Lund, Ole and Buus, Søren},
  date = {2007-08-29},
  journaltitle = {PLOS ONE},
  volume = {2},
  number = {8},
  pages = {e796},
  publisher = {Public Library of Science},
  issn = {1932-6203},
  doi = {10.1371/journal.pone.0000796},
  url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0000796},
  urldate = {2022-05-12},
  abstract = {BackgroundBinding of peptides to Major Histocompatibility Complex (MHC) molecules is the single most selective step in the recognition of pathogens by the cellular immune system. The human MHC class I system (HLA-I) is extremely polymorphic. The number of registered HLA-I molecules has now surpassed 1500. Characterizing the specificity of each separately would be a major undertaking.Principal FindingsHere, we have drawn on a large database of known peptide-HLA-I interactions to develop a bioinformatics method, which takes both peptide and HLA sequence information into account, and generates quantitative predictions of the affinity of any peptide-HLA-I interaction. Prospective experimental validation of peptides predicted to bind to previously untested HLA-I molecules, cross-validation, and retrospective prediction of known HIV immune epitopes and endogenous presented peptides, all successfully validate this method. We further demonstrate that the method can be applied to perform a clustering analysis of MHC specificities and suggest using this clustering to select particularly informative novel MHC molecules for future biochemical and functional analysis.ConclusionsEncompassing all HLA molecules, this high-throughput computational method lends itself to epitope searches that are not only genome- and pathogen-wide, but also HLA-wide. Thus, it offers a truly global analysis of immune responses supporting rational development of vaccines and immunotherapy. It also promises to provide new basic insights into HLA structure-function relationships. The method is available at http://www.cbs.dtu.dk/services/NetMHCpan.},
  langid = {english},
  keywords = {Artificial neural networks,Binding analysis,Database and informatics methods,HIV,Intracellular pathogens,Major histocompatibility complex,Neural networks,Phylogenetic analysis},
  annotation = {466 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/K6839SQM/Nielsen et al. - 2007 - NetMHCpan, a Method for Quantitative Predictions o.pdf;/Users/Nasy/Zotero/storage/RCGDIF6E/article.html}
}

@inproceedings{NIPS2007_d7322ed7,
  title = {Probabilistic Matrix Factorization},
  booktitle = {Advances in Neural Information Processing Systems},
  author = {Mnih, Andriy and Salakhutdinov, Russ R},
  editor = {Platt, J. and Koller, D. and Singer, Y. and Roweis, S.},
  date = {2007},
  volume = {20},
  publisher = {Curran Associates, Inc.},
  url = {https://proceedings.neurips.cc/paper/2007/file/d7322ed717dedf1eb4e6e52a37ea7bcd-Paper.pdf},
  keywords = {nosource}
}

@article{oonoAsymptoticBehaviorsGraph2019,
  title = {On {{Asymptotic Behaviors}} of {{Graph CNNs}} from {{Dynamical Systems Perspective}}},
  author = {Oono, Kenta and Suzuki, Taiji},
  date = {2019-01-01},
  url = {https://openreview.net/forum?id=mPLlSuOPxJh},
  urldate = {2022-03-11},
  abstract = {Graph Neural Networks (graph NNs) are a promising deep learning approach for analyzing graph-structured data. However, it is known that they do not improve (or sometimes worsen) their predictive...},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/VUERQMTE/forum.html}
}

@online{osheaIntroductionConvolutionalNeural2015,
  title = {An {{Introduction}} to {{Convolutional Neural Networks}}},
  author = {O'Shea, Keiron and Nash, Ryan},
  date = {2015-12-02},
  eprint = {1511.08458},
  eprinttype = {arXiv},
  doi = {10.48550/arXiv.1511.08458},
  url = {http://arxiv.org/abs/1511.08458},
  urldate = {2024-10-30},
  abstract = {The field of machine learning has taken a dramatic twist in recent times, with the rise of the Artificial Neural Network (ANN). These biologically inspired computational models are able to far exceed the performance of previous forms of artificial intelligence in common machine learning tasks. One of the most impressive forms of ANN architecture is that of the Convolutional Neural Network (CNN). CNNs are primarily used to solve difficult image-driven pattern recognition tasks and with their precise yet simple architecture, offers a simplified method of getting started with ANNs. This document provides a brief introduction to CNNs, discussing recently published papers and newly formed techniques in developing these brilliantly fantastic image recognition models. This introduction assumes you are familiar with the fundamentals of ANNs and machine learning.},
  pubstate = {prepublished},
  keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing},
  file = {/Users/Nasy/Zotero/storage/QZESEMNE/O'Shea and Nash - 2015 - An Introduction to Convolutional Neural Networks.pdf;/Users/Nasy/Zotero/storage/WRPJ3B2Y/1511.html}
}

@article{pearsonIntroductionSequenceSimilarity2013,
  title = {An Introduction to Sequence Similarity ("homology") Searching},
  author = {Pearson, William R.},
  date = {2013-06},
  journaltitle = {Curr Protoc Bioinformatics},
  volume = {Chapter 3},
  eprint = {23749753},
  eprinttype = {pmid},
  pages = {Unit3.1},
  issn = {1934-340X},
  doi = {10.1002/0471250953.bi0301s42},
  abstract = {Sequence similarity searching, typically with BLAST, is the most widely used and most reliable strategy for characterizing newly determined sequences. Sequence similarity searches can identify "homologous" proteins or genes by detecting excess similarity- statistically significant similarity that reflects common ancestry. This unit provides an overview of the inference of homology from significant similarity, and introduces other units in this chapter that provide more details on effective strategies for identifying homologs.},
  langid = {english},
  pmcid = {PMC3820096},
  keywords = {Databases Protein,Proteins,Sequence Alignment,Sequence Analysis,Sequence Homology},
  annotation = {318 citations (Crossref) [2022-08-19]},
  file = {/Users/Nasy/Zotero/storage/3GCTR97R/Pearson - 2013 - An introduction to sequence similarity (homology.pdf}
}

@article{pengCharacterizingInteractionConformation2023,
  title = {Characterizing the Interaction Conformation between {{T-cell}} Receptors and Epitopes with Deep Learning},
  author = {Peng, Xingang and Lei, Yipin and Feng, Peiyuan and Jia, Lemei and Ma, Jianzhu and Zhao, Dan and Zeng, Jianyang},
  date = {2023-04},
  journaltitle = {Nat Mach Intell},
  volume = {5},
  number = {4},
  pages = {395--407},
  publisher = {Nature Publishing Group},
  issn = {2522-5839},
  doi = {10.1038/s42256-023-00634-4},
  url = {https://www.nature.com/articles/s42256-023-00634-4},
  urldate = {2023-08-14},
  abstract = {Computational modelling of the interactions between T-cell receptors (TCRs) and epitopes is of great importance for immunotherapy and antigen discovery. However, current TCR–epitope interaction prediction tools are still in a relatively primitive stage and have limited capacity in deciphering the underlying binding mechanisms, for example, characterizing the pairwise residue interactions between TCRs and epitopes. Here we designed a new deep-learning-based framework for modelling TCR–epitope interactions, called TCR–Epitope Interaction Modelling at Residue Level (TEIM-Res), which took the sequences of TCRs and epitopes as input and predicted both pairwise residue distances and contact sites involved in the interactions. To tackle the current bottleneck of data deficiency, we applied a few-shot learning strategy by incorporating sequence-level binding information into residue-level interaction prediction. The validation experiments and analyses indicated its good prediction performance and the effectiveness of its design. We demonstrated three potential applications: revealing the subtle conformation changes of mutant TCR–epitope pairs, uncovering the key contacts based on epitope-specific TCR pools, and mining the intrinsic binding rules and patterns. In summary, our model can serve as a useful tool for comprehensively characterizing TCR–epitope interactions and understanding the molecular basis of binding mechanisms.},
  issue = {4},
  langid = {english},
  keywords = {Cellular immunity,Computational biology and bioinformatics,Machine learning,Protein function predictions},
  file = {/Users/Nasy/Zotero/storage/PVQPIQU6/Peng et al. - 2023 - Characterizing the interaction conformation between T-cell receptors and epitopes with deep learning.pdf}
}

@article{pengCharacterizingInteractionConformation2023a,
  title = {Characterizing the Interaction Conformation between {{T-cell}} Receptors and Epitopes with Deep Learning},
  author = {Peng, Xingang and Lei, Yipin and Feng, Peiyuan and Jia, Lemei and Ma, Jianzhu and Zhao, Dan and Zeng, Jianyang},
  date = {2023-04},
  journaltitle = {Nat Mach Intell},
  volume = {5},
  number = {4},
  pages = {395--407},
  publisher = {Nature Publishing Group},
  issn = {2522-5839},
  doi = {10.1038/s42256-023-00634-4},
  url = {https://www.nature.com/articles/s42256-023-00634-4},
  urldate = {2024-10-30},
  abstract = {Computational modelling of the interactions between T-cell receptors (TCRs) and epitopes is of great importance for immunotherapy and antigen discovery. However, current TCR–epitope interaction prediction tools are still in a relatively primitive stage and have limited capacity in deciphering the underlying binding mechanisms, for example, characterizing the pairwise residue interactions between TCRs and epitopes. Here we designed a new deep-learning-based framework for modelling TCR–epitope interactions, called TCR–Epitope Interaction Modelling at Residue Level (TEIM-Res), which took the sequences of TCRs and epitopes as input and predicted both pairwise residue distances and contact sites involved in the interactions. To tackle the current bottleneck of data deficiency, we applied a few-shot learning strategy by incorporating sequence-level binding information into residue-level interaction prediction. The validation experiments and analyses indicated its good prediction performance and the effectiveness of its design. We demonstrated three potential applications: revealing the subtle conformation changes of mutant TCR–epitope pairs, uncovering the key contacts based on epitope-specific TCR pools, and mining the intrinsic binding rules and patterns. In summary, our model can serve as a useful tool for comprehensively characterizing TCR–epitope interactions and understanding the molecular basis of binding mechanisms.},
  langid = {english},
  keywords = {Cellular immunity,Computational biology and bioinformatics,Machine learning,Protein function predictions}
}

@inproceedings{pengFinegrainedEventCategorization2019,
  title = {Fine-Grained {{Event Categorization}} with {{Heterogeneous Graph Convolutional Networks}}},
  booktitle = {Proceedings of the {{Twenty-Eighth International Joint Conference}} on {{Artificial Intelligence}}},
  author = {Peng, Hao and Li, Jianxin and Gong, Qiran and Song, Yangqiu and Ning, Yuanxin and Lai, Kunfeng and Yu, Philip S.},
  date = {2019-08},
  pages = {3238--3245},
  publisher = {International Joint Conferences on Artificial Intelligence Organization},
  location = {Macao, China},
  doi = {10.24963/ijcai.2019/449},
  url = {https://www.ijcai.org/proceedings/2019/449},
  urldate = {2022-05-26},
  abstract = {Events are happening in real-world and real-time, which can be planned and organized occasions involving multiple people and objects. Social media platforms publish a lot of text messages containing public events with comprehensive topics. However, mining social events is challenging due to the heterogeneous event elements in texts and explicit and implicit social network structures. In this paper, we design an event meta-schema to characterize the semantic relatedness of social events and build an event-based heterogeneous information network (HIN) integrating information from external knowledge base, and propose a novel Pairwise Popularity Graph Convolutional Network (PP-GCN) based fine-grained social event categorization model. We propose a Knowledgeable meta-paths Instances based social Event Similarity (KIES) between events and build a weighted adjacent matrix as input to the PP-GCN model. Comprehensive experiments on real data collections are conducted to compare various social event detection and clustering tasks. Experimental results demonstrate that our proposed framework outperforms other alternative social event categorization techniques.},
  eventtitle = {Twenty-{{Eighth International Joint Conference}} on {{Artificial Intelligence}} \{\vphantom\}{{IJCAI-19}}\vphantom\{\}},
  isbn = {978-0-9992411-4-1},
  langid = {english},
  annotation = {32 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/7NJW8EC5/Peng et al. - 2019 - Fine-grained Event Categorization with Heterogeneo.pdf}
}

@online{pengHyperbolicDeepNeural2021,
  title = {Hyperbolic {{Deep Neural Networks}}: {{A Survey}}},
  shorttitle = {Hyperbolic {{Deep Neural Networks}}},
  author = {Peng, Wei and Varanka, Tuomas and Mostafa, Abdelrahman and Shi, Henglin and Zhao, Guoying},
  date = {2021-02-17},
  eprint = {2101.04562},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2101.04562},
  url = {http://arxiv.org/abs/2101.04562},
  urldate = {2022-12-16},
  abstract = {Recently, there has been a rising surge of momentum for deep representation learning in hyperbolic spaces due to theirhigh capacity of modeling data like knowledge graphs or synonym hierarchies, possessing hierarchical structure. We refer to the model as hyperbolic deep neural network in this paper. Such a hyperbolic neural architecture potentially leads to drastically compact model withmuch more physical interpretability than its counterpart in Euclidean space. To stimulate future research, this paper presents acoherent and comprehensive review of the literature around the neural components in the construction of hyperbolic deep neuralnetworks, as well as the generalization of the leading deep approaches to the Hyperbolic space. It also presents current applicationsaround various machine learning tasks on several publicly available datasets, together with insightful observations and identifying openquestions and promising future directions.},
  pubstate = {prepublished},
  keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/UQRTSUJV/Peng et al. - 2021 - Hyperbolic Deep Neural Networks A Survey.pdf;/Users/Nasy/Zotero/storage/97A98ALB/2101.html}
}

@article{pengLearningRepresentationsGraphical2023,
  title = {Learning {{Representations}} by {{Graphical Mutual Information Estimation}} and {{Maximization}}},
  author = {Peng, Zhen and Luo, Minnan and Huang, Wenbing and Li, Jundong and Zheng, Qinghua and Sun, Fuchun and Huang, Junzhou},
  date = {2023-01-01},
  journaltitle = {IEEE Trans. Pattern Anal. Mach. Intell.},
  volume = {45},
  number = {1},
  pages = {722--737},
  issn = {0162-8828, 2160-9292, 1939-3539},
  doi = {10.1109/TPAMI.2022.3147886},
  url = {https://ieeexplore.ieee.org/document/9699378/},
  abstract = {The rich content in various real-world networks such as social networks, biological networks, and communication networks provides unprecedented opportunities for unsupervised machine learning on graphs. This paper investigates the fundamental problem of preserving and extracting abundant information from graph-structured data into embedding space without external supervision. To this end, we generalize conventional mutual information computation from vector space to graph domain and present a novel concept, Graphical Mutual Information (GMI), to measure the correlation between input graph and hidden representation. Except for standard GMI which considers graph structures from a local perspective, our further proposed GMI++ additionally captures global topological properties by analyzing the co-occurrence relationship of nodes. GMI and its extension exhibit several benefits: First, they are invariant to the isomorphic transformation of input graphs—an inevitable constraint in many existing methods; Second, they can be efficiently estimated and maximized by current mutual information estimation methods; Lastly, our theoretical analysis confirms their correctness and rationality. With the aid of GMI, we develop an unsupervised embedding model and adapt it to the specific anomaly detection task. Extensive experiments indicate that our GMI methods achieve promising performance in various downstream tasks, such as node classification, link prediction, and anomaly detection.},
  langid = {english},
  annotation = {0 citations (Crossref) [2023-05-26]},
  file = {/Users/Nasy/Zotero/storage/EJWBRVXJ/Peng et al. - 2023 - Learning Representations by Graphical Mutual Infor.pdf}
}

@article{prihodaBioPhiPlatformAntibody2022,
  title = {{{BioPhi}}: {{A}} Platform for Antibody Design, Humanization, and Humanness Evaluation Based on Natural Antibody Repertoires and Deep Learning},
  shorttitle = {{{BioPhi}}},
  author = {Prihoda, David and Maamary, Jad and Waight, Andrew and Juan, Veronica and Fayadat-Dilman, Laurence and Svozil, Daniel and Bitton, Danny A.},
  date = {2022-12-31},
  journaltitle = {mAbs},
  volume = {14},
  number = {1},
  eprint = {35133949},
  eprinttype = {pmid},
  pages = {2020203},
  publisher = {Taylor \& Francis},
  issn = {1942-0862},
  doi = {10.1080/19420862.2021.2020203},
  url = {https://doi.org/10.1080/19420862.2021.2020203},
  abstract = {Despite recent advances in transgenic animal models and display technologies, humanization of mouse sequences remains one of the main routes for therapeutic antibody development. Traditionally, humanization is manual, laborious, and requires expert knowledge. Although automation efforts are advancing, existing methods are either demonstrated on a small scale or are entirely proprietary. To predict the immunogenicity risk, the human-likeness of sequences can be evaluated using existing humanness scores, but these lack diversity, granularity or interpretability. Meanwhile, immune repertoire sequencing has generated rich antibody libraries such as the Observed Antibody Space (OAS) that offer augmented diversity not yet exploited for antibody engineering. Here we present BioPhi, an open-source platform featuring novel methods for humanization (Sapiens) and humanness evaluation (OASis). Sapiens is a deep learning humanization method trained on the OAS using language modeling. Based on an in silico humanization benchmark of 177 antibodies, Sapiens produced sequences at scale while achieving results comparable to that of human experts. OASis is a granular, interpretable and diverse humanness score based on 9-mer peptide search in the OAS. OASis separated human and non-human sequences with high accuracy, and correlated with clinical immunogenicity. BioPhi thus offers an antibody design interface with automated methods that capture the richness of natural antibody repertoires to produce therapeutics with desired properties and accelerate antibody discovery campaigns. The BioPhi platform is accessible at https://biophi.dichlab.org and https://github.com/Merck/BioPhi.},
  keywords = {Antibody humanization,deep learning,deimmunization,human-likeness,humanness,immune repertoires,immunogenicity,machine learning},
  annotation = {5 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/HFASE7FT/Prihoda et al. - 2022 - BioPhi A platform for antibody design, humanizati.pdf;/Users/Nasy/Zotero/storage/X7MFVRQN/19420862.2021.html}
}

@inproceedings{qianDistillingMetaKnowledge2021,
  title = {Distilling {{Meta Knowledge}} on {{Heterogeneous Graph}} for {{Illicit Drug Trafficker Detection}} on {{Social Media}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Qian, Yiyue and Zhang, Yiming and Ye, Yanfang (Fa and Zhang, Chuxu},
  date = {2021},
  volume = {34},
  pages = {26911--26923},
  publisher = {Curran Associates, Inc.},
  url = {https://proceedings.neurips.cc/paper/2021/hash/e234e195f3789f05483378c397db1cb5-Abstract.html},
  urldate = {2022-05-26},
  abstract = {Driven by the considerable profits, the crime of drug trafficking (a.k.a. illicit drug trading) has co-evolved with modern technologies, e.g., social media such as Instagram has become a popular platform for marketing and selling illicit drugs. The activities of online drug trafficking are nimble and resilient, which call for novel techniques to effectively detect, disrupt, and dismantle illicit drug trades. In this paper, we propose a holistic framework named MetaHG to automatically detect illicit drug traffickers on social media (i.e., Instagram), by tackling the following two new challenges: (1) different from existing works which merely focus on analyzing post content, MetaHG is capable of jointly modeling multi-modal content and relational structured information on social media for illicit drug trafficker detection; (2) in addition, through the proposed meta-learning technique, MetaHG addresses the issue of requiring sufficient data for model training. More specifically, in our proposed MetaHG, we first build a heterogeneous graph (HG) to comprehensively characterize the complex ecosystem of drug trafficking on social media.  Then, we employ a relation-based graph convolutional neural network to learn node (i.e., user) representations over the built HG, in which we introduce graph structure refinement to compensate the sparse connection among entities in the HG for more robust node representation learning. Afterwards, we propose a meta-learning algorithm for model optimization. A self-supervised module and a knowledge distillation module are further designed to exploit unlabeled data for improving the model. Extensive experiments based on the real-world data collected from Instagram demonstrate that the proposed MetaHG outperforms state-of-the-art methods.},
  file = {/Users/Nasy/Zotero/storage/A6IXXW2Z/Qian et al. - 2021 - Distilling Meta Knowledge on Heterogeneous Graph f.pdf}
}

@article{raghavaQuantificationVariationPercentage2006,
  title = {Quantification of the Variation in Percentage Identity for Protein Sequence Alignments},
  author = {Raghava, GPS and Barton, Geoffrey J.},
  date = {2006-09-19},
  journaltitle = {BMC Bioinformatics},
  volume = {7},
  number = {1},
  pages = {415},
  issn = {1471-2105},
  doi = {10.1186/1471-2105-7-415},
  url = {https://doi.org/10.1186/1471-2105-7-415},
  urldate = {2022-08-19},
  abstract = {Percentage Identity (PID) is frequently quoted in discussion of sequence alignments since it appears simple and easy to understand. However, although there are several different ways to calculate percentage identity and each may yield a different result for the same alignment, the method of calculation is rarely reported. Accordingly, quantification of the variation in PID caused by the different calculations would help in interpreting PID values in the literature. In this study, the variation in PID was quantified systematically on a reference set of 1028 alignments generated by comparison of the protein three-dimensional structures. Since the alignment algorithm may also affect the range of PID, this study also considered the effect of algorithm, and the combination of algorithm and PID method.},
  keywords = {Alignment Algorithm,Alignment Method,Percentage Identity,Reference Alignment,Structural Alignment},
  annotation = {35 citations (Crossref) [2022-08-19]},
  file = {/Users/Nasy/Zotero/storage/BDPRKDUJ/Raghava and Barton - 2006 - Quantification of the variation in percentage iden.pdf;/Users/Nasy/Zotero/storage/J4MGCD92/1471-2105-7-415.html}
}

@online{raissiPhysicsInformedDeep2017,
  title = {Physics {{Informed Deep Learning}} ({{Part I}}): {{Data-driven Solutions}} of {{Nonlinear Partial Differential Equations}}},
  shorttitle = {Physics {{Informed Deep Learning}} ({{Part I}})},
  author = {Raissi, Maziar and Perdikaris, Paris and Karniadakis, George Em},
  date = {2017-11-28},
  eprint = {1711.10561},
  eprinttype = {arXiv},
  url = {http://arxiv.org/abs/1711.10561},
  abstract = {We introduce physics informed neural networks -- neural networks that are trained to solve supervised learning tasks while respecting any given law of physics described by general nonlinear partial differential equations. In this two part treatise, we present our developments in the context of solving two main classes of problems: data-driven solution and data-driven discovery of partial differential equations. Depending on the nature and arrangement of the available data, we devise two distinct classes of algorithms, namely continuous time and discrete time models. The resulting neural networks form a new class of data-efficient universal function approximators that naturally encode any underlying physical laws as prior information. In this first part, we demonstrate how these networks can be used to infer solutions to partial differential equations, and obtain physics-informed surrogate models that are fully differentiable with respect to all input coordinates and free parameters.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Mathematics - Dynamical Systems,Mathematics - Numerical Analysis,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/SATZKTQD/Raissi et al. - 2017 - Physics Informed Deep Learning (Part I) Data-driv.pdf;/Users/Nasy/Zotero/storage/8276FER2/1711.html}
}

@online{raissiPhysicsInformedDeep2017a,
  title = {Physics {{Informed Deep Learning}} ({{Part II}}): {{Data-driven Discovery}} of {{Nonlinear Partial Differential Equations}}},
  shorttitle = {Physics {{Informed Deep Learning}} ({{Part II}})},
  author = {Raissi, Maziar and Perdikaris, Paris and Karniadakis, George Em},
  date = {2017-11-28},
  eprint = {1711.10566},
  eprinttype = {arXiv},
  url = {http://arxiv.org/abs/1711.10566},
  abstract = {We introduce physics informed neural networks -- neural networks that are trained to solve supervised learning tasks while respecting any given law of physics described by general nonlinear partial differential equations. In this second part of our two-part treatise, we focus on the problem of data-driven discovery of partial differential equations. Depending on whether the available data is scattered in space-time or arranged in fixed temporal snapshots, we introduce two main classes of algorithms, namely continuous time and discrete time models. The effectiveness of our approach is demonstrated using a wide range of benchmark problems in mathematical physics, including conservation laws, incompressible fluid flow, and the propagation of nonlinear shallow-water waves.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Mathematics - Analysis of PDEs,Mathematics - Numerical Analysis,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/8JFZPRNK/Raissi et al. - 2017 - Physics Informed Deep Learning (Part II) Data-dri.pdf;/Users/Nasy/Zotero/storage/TWRBP6HM/1711.html}
}

@article{raissiPhysicsinformedNeuralNetworks2019,
  title = {Physics-Informed Neural Networks: {{A}} Deep Learning Framework for Solving Forward and Inverse Problems Involving Nonlinear Partial Differential Equations},
  shorttitle = {Physics-Informed Neural Networks},
  author = {Raissi, M. and Perdikaris, P. and Karniadakis, G. E.},
  date = {2019-02-01},
  journaltitle = {Journal of Computational Physics},
  volume = {378},
  pages = {686--707},
  issn = {0021-9991},
  doi = {10.1016/j.jcp.2018.10.045},
  url = {https://www.sciencedirect.com/science/article/pii/S0021999118307125},
  abstract = {We introduce physics-informed neural networks – neural networks that are trained to solve supervised learning tasks while respecting any given laws of physics described by general nonlinear partial differential equations. In this work, we present our developments in the context of solving two main classes of problems: data-driven solution and data-driven discovery of partial differential equations. Depending on the nature and arrangement of the available data, we devise two distinct types of algorithms, namely continuous time and discrete time models. The first type of models forms a new family of data-efficient spatio-temporal function approximators, while the latter type allows the use of arbitrarily accurate implicit Runge–Kutta time stepping schemes with unlimited number of stages. The effectiveness of the proposed framework is demonstrated through a collection of classical problems in fluids, quantum mechanics, reaction–diffusion systems, and the propagation of nonlinear shallow-water waves.},
  langid = {english},
  keywords = {Data-driven scientific computing,Machine learning,Nonlinear dynamics,Predictive modeling,Runge–Kutta methods},
  annotation = {1926 citations (Crossref) [2022-10-06]},
  file = {/Users/Nasy/Zotero/storage/UYWIK3WB/Raissi et al. - 2019 - Physics-informed neural networks A deep learning .pdf;/Users/Nasy/Zotero/storage/4AID5NF5/S0021999118307125.html}
}

@article{raoEvaluatingProteinTransfer2019,
  title = {Evaluating {{Protein Transfer Learning}} with {{TAPE}}},
  author = {Rao, Roshan and Bhattacharya, Nicholas and Thomas, Neil and Duan, Yan and Chen, Xi and Canny, John and Abbeel, Pieter and Song, Yun S.},
  date = {2019-12},
  journaltitle = {Adv Neural Inf Process Syst},
  volume = {32},
  eprint = {33390682},
  eprinttype = {pmid},
  pages = {9689--9701},
  issn = {1049-5258},
  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7774645/},
  urldate = {2022-08-19},
  abstract = {Machine learning applied to protein sequences is an increasingly popular area of research. Semi-supervised learning for proteins has emerged as an important paradigm due to the high cost of acquiring supervised protein labels, but the current literature is fragmented when it comes to datasets and standardized evaluation techniques. To facilitate progress in this field, we introduce the Tasks Assessing Protein Embeddings (TAPE), a set of five biologically relevant semi-supervised learning tasks spread across different domains of protein biology. We curate tasks into specific training, validation, and test splits to ensure that each task tests biologically relevant generalization that transfers to real-life scenarios. We benchmark a range of approaches to semi-supervised protein representation learning, which span recent work as well as canonical sequence learning techniques. We find that self-supervised pretraining is helpful for almost all models on all tasks, more than doubling performance in some cases. Despite this increase, in several cases features learned by self-supervised pretraining still lag behind features extracted by state-of-the-art non-neural techniques. This gap in performance suggests a huge opportunity for innovative architecture design and improved modeling paradigms that better capture the signal in biological sequences. TAPE will help the machine learning community focus effort on scientifically relevant problems. Toward this end, all data and code used to run these experiments are available at https://github.com/songlab-cal/tape.},
  pmcid = {PMC7774645},
  file = {/Users/Nasy/Zotero/storage/MMFVX27X/Rao et al. - 2019 - Evaluating Protein Transfer Learning with TAPE.pdf}
}

@online{rastegariXNORNetImageNetClassification2016,
  title = {{{XNOR-Net}}: {{ImageNet Classification Using Binary Convolutional Neural Networks}}},
  shorttitle = {{{XNOR-Net}}},
  author = {Rastegari, Mohammad and Ordonez, Vicente and Redmon, Joseph and Farhadi, Ali},
  date = {2016-08-02},
  eprint = {1603.05279},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.1603.05279},
  url = {http://arxiv.org/abs/1603.05279},
  abstract = {We propose two efficient approximations to standard convolutional neural networks: Binary-Weight-Networks and XNOR-Networks. In Binary-Weight-Networks, the filters are approximated with binary values resulting in 32x memory saving. In XNOR-Networks, both the filters and the input to convolutional layers are binary. XNOR-Networks approximate convolutions using primarily binary operations. This results in 58x faster convolutional operations and 32x memory savings. XNOR-Nets offer the possibility of running state-of-the-art networks on CPUs (rather than GPUs) in real-time. Our binary networks are simple, accurate, efficient, and work on challenging visual tasks. We evaluate our approach on the ImageNet classification task. The classification accuracy with a Binary-Weight-Network version of AlexNet is only 2.9\% less than the full-precision AlexNet (in top-1 measure). We compare our method with recent network binarization methods, BinaryConnect and BinaryNets, and outperform these methods by large margins on ImageNet, more than 16\% in top-1 accuracy.},
  pubstate = {prepublished},
  keywords = {Computer Science - Computer Vision and Pattern Recognition},
  file = {/Users/Nasy/Zotero/storage/BKFJRHS6/Rastegari et al. - 2016 - XNOR-Net ImageNet Classification Using Binary Con.pdf;/Users/Nasy/Zotero/storage/CWXBI3G6/1603.html}
}

@online{RecommenderSystemsSocial,
  title = {Recommender Systems with Social Regularization | {{Proceedings}} of the Fourth {{ACM}} International Conference on {{Web}} Search and Data Mining},
  url = {https://dl.acm.org/doi/abs/10.1145/1935826.1935877?casa_token=Wuv9WUf2v9cAAAAA:duFZCLfRyoUlbYV9m26wYjDeqzsNjfNeZKr2_dAcDh3t9r3i53aitt9d6FvBp5XagW_8CsVKi6ECQg},
  urldate = {2022-06-05},
  file = {/Users/Nasy/Zotero/storage/RWBA4N2M/1935826.html}
}

@article{rivesBiologicalStructureFunction2021,
  title = {Biological Structure and Function Emerge from Scaling Unsupervised Learning to 250 Million Protein Sequences},
  author = {Rives, Alexander and Meier, Joshua and Sercu, Tom and Goyal, Siddharth and Lin, Zeming and Liu, Jason and Guo, Demi and Ott, Myle and Zitnick, C. Lawrence and Ma, Jerry and Fergus, Rob},
  date = {2021-04-13},
  journaltitle = {Proceedings of the National Academy of Sciences},
  volume = {118},
  number = {15},
  pages = {e2016239118},
  publisher = {Proceedings of the National Academy of Sciences},
  doi = {10.1073/pnas.2016239118},
  url = {https://www.pnas.org/doi/10.1073/pnas.2016239118},
  annotation = {247 citations (Crossref) [2022-08-19]},
  file = {/Users/Nasy/Zotero/storage/IM3VZ3FW/Rives et al. - 2021 - Biological structure and function emerge from scal.pdf}
}

@inproceedings{robinsonContrastiveLearningHard2020,
  title = {Contrastive {{Learning}} with {{Hard Negative Samples}}},
  author = {Robinson, Joshua David and Chuang, Ching-Yao and Sra, Suvrit and Jegelka, Stefanie},
  date = {2020-09-28},
  url = {https://openreview.net/forum?id=CR1XOQ0UTh-},
  urldate = {2022-03-17},
  abstract = {We consider the question: how can you sample good negative examples for contrastive learning? We argue that, as with metric learning, learning contrastive representations benefits from hard...},
  eventtitle = {International {{Conference}} on {{Learning Representations}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/DPMB76BE/Robinson et al. - 2020 - Contrastive Learning with Hard Negative Samples.pdf;/Users/Nasy/Zotero/storage/RYJ26MBV/forum.html}
}

@unpublished{rongDropEdgeDeepGraph2020,
  title = {{{DropEdge}}: {{Towards Deep Graph Convolutional Networks}} on {{Node Classification}}},
  shorttitle = {{{DropEdge}}},
  author = {Rong, Yu and Huang, Wenbing and Xu, Tingyang and Huang, Junzhou},
  date = {2020-03-12},
  eprint = {1907.10903},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/1907.10903},
  urldate = {2022-03-11},
  abstract = {\textbackslash emph\{Over-fitting\} and \textbackslash emph\{over-smoothing\} are two main obstacles of developing deep Graph Convolutional Networks (GCNs) for node classification. In particular, over-fitting weakens the generalization ability on small dataset, while over-smoothing impedes model training by isolating output representations from the input features with the increase in network depth. This paper proposes DropEdge, a novel and flexible technique to alleviate both issues. At its core, DropEdge randomly removes a certain number of edges from the input graph at each training epoch, acting like a data augmenter and also a message passing reducer. Furthermore, we theoretically demonstrate that DropEdge either reduces the convergence speed of over-smoothing or relieves the information loss caused by it. More importantly, our DropEdge is a general skill that can be equipped with many other backbone models (e.g. GCN, ResGCN, GraphSAGE, and JKNet) for enhanced performance. Extensive experiments on several benchmarks verify that DropEdge consistently improves the performance on a variety of both shallow and deep GCNs. The effect of DropEdge on preventing over-smoothing is empirically visualized and validated as well. Codes are released on\textasciitilde\textbackslash url\{https://github.com/DropEdge/DropEdge\}.},
  keywords = {Computer Science - Machine Learning,Computer Science - Networking and Internet Architecture,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/62RV76HQ/Rong et al. - 2020 - DropEdge Towards Deep Graph Convolutional Network.pdf;/Users/Nasy/Zotero/storage/RVE5DSMK/1907.html}
}

@unpublished{ruanRapidSearchMassive2021,
  title = {Rapid Search for Massive Black Hole Binary Coalescences Using Deep Learning},
  author = {Ruan, Wen-Hong and Wang, He and Liu, Chang and Guo, Zong-Kuan},
  date = {2021-11-29},
  eprint = {2111.14546},
  eprinttype = {arXiv},
  eprintclass = {astro-ph, physics:gr-qc, physics:physics},
  url = {http://arxiv.org/abs/2111.14546},
  urldate = {2021-11-30},
  abstract = {The coalescences of massive black hole binaries (MBHBs) are one of the main targets of space-based gravitational wave observatories. Such gravitational wave sources are expected to be accompanied by electromagnetic emission. Low latency time of gravitational wave searches and accurate sky localization are keys in triggering successful follow-up observations on the electromagnetic counterparts. Here we present a deep learning method for the first time to rapidly search for MBHB signals in the strain data. Our model is capable to process 1-year of data in just several seconds, identifying all MBHB coalescences with no false alarms. We test the performance of our model on the simulated data from the LISA data challenge. We demonstrate that the model shows a robust resistance for a wide range of generalization for MBHB signals. This method is supposed to be an effective approach, which combined the advances of artificial intelligence to open a new pathway for space-based gravitational wave observations.},
  keywords = {Astrophysics - Instrumentation and Methods for Astrophysics,General Relativity and Quantum Cosmology,Physics - Data Analysis Statistics and Probability},
  file = {/Users/Nasy/Zotero/storage/QSXCD23Z/Ruan et al. - 2021 - Rapid search for massive black hole binary coalesc.pdf;/Users/Nasy/Zotero/storage/TCVNJFI4/2111.html}
}

@article{ruffoloAntibodyStructurePrediction2022,
  title = {Antibody Structure Prediction Using Interpretable Deep Learning},
  author = {Ruffolo, Jeffrey A. and Sulam, Jeremias and Gray, Jeffrey J.},
  date = {2022-02-11},
  journaltitle = {PATTER},
  volume = {3},
  number = {2},
  publisher = {Elsevier},
  issn = {2666-3899},
  doi = {10.1016/j.patter.2021.100406},
  url = {https://www.cell.com/patterns/abstract/S2666-3899(21)00280-4},
  langid = {english},
  keywords = {antibody design,deep learning,DSML 3: Development/pre-production: Data science output has been rolled out/validated across multiple domains/problems,model interpretability,protein structure prediction},
  annotation = {16 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/GSIKJP5N/Ruffolo et al. - 2022 - Antibody structure prediction using interpretable .pdf;/Users/Nasy/Zotero/storage/J38CSJG9/S2666-3899(21)00280-4.html}
}

@unpublished{safarzadehInterpretingMachineLearning2022,
  title = {Interpreting a {{Machine Learning Model}} for {{Detecting Gravitational Waves}}},
  author = {Safarzadeh, Mohammadtaher and Khan, Asad and Huerta, E. A. and Wattenberg, Martin},
  date = {2022-02-15},
  eprint = {2202.07399},
  eprinttype = {arXiv},
  eprintclass = {astro-ph, physics:gr-qc},
  url = {http://arxiv.org/abs/2202.07399},
  urldate = {2022-02-16},
  abstract = {We describe a case study of translational research, applying interpretability techniques developed for computer vision to machine learning models used to search for and find gravitational waves. The models we study are trained to detect black hole merger events in non-Gaussian and non-stationary advanced Laser Interferometer Gravitational-wave Observatory (LIGO) data. We produced visualizations of the response of machine learning models when they process advanced LIGO data that contains real gravitational wave signals, noise anomalies, and pure advanced LIGO noise. Our findings shed light on the responses of individual neurons in these machine learning models. Further analysis suggests that different parts of the network appear to specialize in local versus global features, and that this difference appears to be rooted in the branched architecture of the network as well as noise characteristics of the LIGO detectors. We believe efforts to whiten these "black box" models can suggest future avenues for research and help inform the design of interpretable machine learning models for gravitational wave astrophysics.},
  keywords = {Astrophysics - High Energy Astrophysical Phenomena,Computer Science - Artificial Intelligence,General Relativity and Quantum Cosmology},
  file = {/Users/Nasy/Zotero/storage/D2D7UZTZ/Safarzadeh et al. - 2022 - Interpreting a Machine Learning Model for Detectin.pdf;/Users/Nasy/Zotero/storage/I73FSN8D/2202.html}
}

@software{saiyangNasyxxEmacs20232023,
  title = {Nasyxx/Emacs.d: 2023 03 31 Version},
  shorttitle = {Nasyxx/Emacs.d},
  author = {Saiyang, Na and {haasdo95} and Berserk},
  date = {2023-04-01},
  doi = {10.5281/zenodo.7790390},
  url = {https://zenodo.org/record/7790390},
  urldate = {2023-04-01},
  abstract = {My emacs configuration},
  organization = {Zenodo},
  file = {/Users/Nasy/Zotero/storage/5QPYZLAW/Saiyang et al. - 2023 - nasyxxemacs.d 2023 03 31 version.pdf}
}

@inproceedings{sankaradasMassivelyParallelCoprocessor2009,
  title = {A {{Massively Parallel Coprocessor}} for {{Convolutional Neural Networks}}},
  booktitle = {2009 20th {{IEEE International Conference}} on {{Application-specific Systems}}, {{Architectures}} and {{Processors}}},
  author = {Sankaradas, Murugan and Jakkula, Venkata and Cadambi, Srihari and Chakradhar, Srimat and Durdanovic, Igor and Cosatto, Eric and Graf, Hans Peter},
  date = {2009},
  pages = {53--60},
  doi = {10.1109/ASAP.2009.25},
  keywords = {nosource},
  annotation = {147 citations (Crossref) [2022-08-03]}
}

@unpublished{sarkarIdentificationGranddesignFlocculent2022,
  title = {Identification of {{Grand-design}} and {{Flocculent Spirals}} from {{SDSS}} Using {{Convolutional Neural}} Network},
  author = {Sarkar, S. and Narayanan, G. and Banerjee, A. and Prakash, P.},
  date = {2022-05-18},
  eprint = {2205.08733},
  eprinttype = {arXiv},
  eprintclass = {astro-ph},
  publisher = {arXiv},
  doi = {10.48550/arXiv.2205.08733},
  url = {http://arxiv.org/abs/2205.08733},
  urldate = {2022-05-19},
  abstract = {Spiral galaxies can be classified into the Grand-designs and Flocculents, based on the nature of their spiral arms. The Grand-designs exhibit almost continuous, high contrast spiral arms and are believed to be driven by density waves; the Flocculents, on the other hand, have patchy or discontinuous spiral features and are mostly stochastic in origin. We construct a convolutional neural network (CNN) model that classifies spirals into Grand-designs and Flocculents, with a testing accuracy of \$\textbackslash mathrm\{97.2\textbackslash\%\}\$. We then use the above model for classifying \$\textbackslash mathrm\{1,220\}\$ new spirals from the SDSS. Out of these, \$\textbackslash mathrm\{721\}\$ are identified as Flocculents, the rest being Grand-designs. The mean asymptotic rotational velocity of our sample Grand-designs and Flocculents are \$\textbackslash mathrm\{218 \textbackslash; km \textbackslash; s\textasciicircum\{-1\}\}\$ and \$\textbackslash mathrm\{145 \textbackslash; km \textbackslash; s\textasciicircum\{-1\}\}\$ respectively, while their respective de Vaucouleur morphological type indices are \$\textbackslash mathrm\{2.6\}\$ and \$\textbackslash mathrm\{4.7\}\$. This possibly indicates that Grand-designs are mostly ordinary high surface brightness galaxies like our Milky Way, while Flocculents are intermediate-mass low surface brightness galaxies.},
  keywords = {Astrophysics - Astrophysics of Galaxies},
  file = {/Users/Nasy/Zotero/storage/96WI5YT8/Sarkar et al. - 2022 - Identification of Grand-design and Flocculent Spir.pdf;/Users/Nasy/Zotero/storage/53LLEW6F/2205.html}
}

@inproceedings{saunshiTheoreticalAnalysisContrastive2019,
  title = {A {{Theoretical Analysis}} of {{Contrastive Unsupervised Representation Learning}}},
  booktitle = {Proceedings of the 36th {{International Conference}} on {{Machine Learning}}},
  author = {Saunshi, Nikunj and Plevrakis, Orestis and Arora, Sanjeev and Khodak, Mikhail and Khandeparkar, Hrishikesh},
  date = {2019-05-24},
  pages = {5628--5637},
  publisher = {PMLR},
  issn = {2640-3498},
  url = {https://proceedings.mlr.press/v97/saunshi19a.html},
  urldate = {2022-03-17},
  abstract = {Recent empirical works have successfully used unlabeled data to learn feature representations that are broadly useful in downstream classification tasks. Several of these methods are reminiscent of the well-known word2vec embedding algorithm: leveraging availability of pairs of semantically “similar" data points and “negative samples," the learner forces the inner product of representations of similar pairs with each other to be higher on average than with negative samples. The current paper uses the term contrastive learning for such algorithms and presents a theoretical framework for analyzing them by introducing latent classes and hypothesizing that semantically similar points are sampled from the same latent class. This framework allows us to show provable guarantees on the performance of the learned representations on the average classification task that is comprised of a subset of the same set of latent classes. Our generalization bound also shows that learned representations can reduce (labeled) sample complexity on downstream tasks. We conduct controlled experiments in both the text and image domains to support the theory.},
  eventtitle = {International {{Conference}} on {{Machine Learning}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/CZRFYMV2/Saunshi et al. - 2019 - A Theoretical Analysis of Contrastive Unsupervised.pdf;/Users/Nasy/Zotero/storage/R3PSC7JM/Saunshi et al. - 2019 - A Theoretical Analysis of Contrastive Unsupervised.pdf}
}

@online{schaferMLGWSC1FirstMachine2022,
  title = {{{MLGWSC-1}}: {{The}} First {{Machine Learning Gravitational-Wave Search Mock Data Challenge}}},
  shorttitle = {{{MLGWSC-1}}},
  author = {Schäfer, Marlin B. and Zelenka, Ondřej and Nitz, Alexander H. and Wang, He and Wu, Shichao and Guo, Zong-Kuan and Cao, Zhoujian and Ren, Zhixiang and Nousi, Paraskevi and Stergioulas, Nikolaos and Iosif, Panagiotis and Koloniari, Alexandra E. and Tefas, Anastasios and Passalis, Nikolaos and Salemi, Francesco and Vedovato, Gabriele and Klimenko, Sergey and Mishra, Tanmaya and Brügmann, Bernd and Cuoco, Elena and Huerta, E. A. and Messenger, Chris and Ohme, Frank},
  date = {2022-09-22},
  eprint = {2209.11146},
  eprinttype = {arXiv},
  eprintclass = {astro-ph, physics:gr-qc},
  doi = {10.48550/arXiv.2209.11146},
  url = {http://arxiv.org/abs/2209.11146},
  urldate = {2022-09-24},
  abstract = {We present the results of the first Machine Learning Gravitational-Wave Search Mock Data Challenge (MLGWSC-1). For this challenge, participating groups had to identify gravitational-wave signals from binary black hole mergers of increasing complexity and duration embedded in progressively more realistic noise. The final of the 4 provided datasets contained real noise from the O3a observing run and signals up to a duration of 20 seconds with the inclusion of precession effects and higher order modes. We present the average sensitivity distance and runtime for the 6 entered algorithms derived from 1 month of test data unknown to the participants prior to submission. Of these, 4 are machine learning algorithms. We find that the best machine learning based algorithms are able to achieve up to 95\% of the sensitive distance of matched-filtering based production analyses for simulated Gaussian noise at a false-alarm rate (FAR) of one per month. In contrast, for real noise, the leading machine learning search achieved 70\%. For higher FARs the differences in sensitive distance shrink to the point where select machine learning submissions outperform traditional search algorithms at FARs \$\textbackslash geq 200\$ per month on some datasets. Our results show that current machine learning search algorithms may already be sensitive enough in limited parameter regions to be useful for some production settings. To improve the state-of-the-art, machine learning algorithms need to reduce the false-alarm rates at which they are capable of detecting signals and extend their validity to regions of parameter space where modeled searches are computationally expensive to run. Based on our findings we compile a list of research areas that we believe are the most important to elevate machine learning searches to an invaluable tool in gravitational-wave signal detection.},
  pubstate = {prepublished},
  keywords = {Astrophysics - High Energy Astrophysical Phenomena,Astrophysics - Instrumentation and Methods for Astrophysics,Computer Science - Machine Learning,General Relativity and Quantum Cosmology},
  file = {/Users/Nasy/Zotero/storage/3YN42JE3/Schäfer et al. - 2022 - MLGWSC-1 The first Machine Learning Gravitational.pdf;/Users/Nasy/Zotero/storage/NCYBRAYI/2209.html}
}

@inproceedings{schlichtkrullModelingRelationalData2018,
  title = {Modeling {{Relational Data}} with {{Graph Convolutional Networks}}},
  booktitle = {The {{Semantic Web}}},
  author = {Schlichtkrull, Michael and Kipf, Thomas N. and Bloem, Peter and van~den Berg, Rianne and Titov, Ivan and Welling, Max},
  editor = {Gangemi, Aldo and Navigli, Roberto and Vidal, Maria-Esther and Hitzler, Pascal and Troncy, Raphaël and Hollink, Laura and Tordai, Anna and Alam, Mehwish},
  options = {useprefix=true},
  date = {2018},
  series = {Lecture {{Notes}} in {{Computer Science}}},
  pages = {593--607},
  publisher = {Springer International Publishing},
  location = {Cham},
  doi = {10.1007/978-3-319-93417-4_38},
  abstract = {Knowledge graphs enable a wide variety of applications, including question answering and information retrieval. Despite the great effort invested in their creation and maintenance, even the largest (e.g., Yago, DBPedia or Wikidata) remain incomplete. We introduce Relational Graph Convolutional Networks (R-GCNs) and apply them to two standard knowledge base completion tasks: Link prediction (recovery of missing facts, i.e.~subject-predicate-object triples) and entity classification (recovery of missing entity attributes). R-GCNs are related to a recent class of neural networks operating on graphs, and are developed specifically to handle the highly multi-relational data characteristic of realistic knowledge bases. We demonstrate the effectiveness of R-GCNs as a stand-alone model for entity classification. We further show that factorization models for link prediction such as DistMult can be significantly improved through the use of an R-GCN encoder model to accumulate evidence over multiple inference steps in the graph, demonstrating a large improvement of 29.8\% on FB15k-237 over a decoder-only baseline.},
  isbn = {978-3-319-93417-4},
  langid = {english},
  annotation = {641 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/TFLYWAU4/Schlichtkrull et al. - 2018 - Modeling Relational Data with Graph Convolutional .pdf}
}

@article{schmidt-hieberKolmogorovArnoldRepresentation2021,
  title = {The {{Kolmogorov}}–{{Arnold}} Representation Theorem Revisited},
  author = {Schmidt-Hieber, Johannes},
  date = {2021-05-01},
  journaltitle = {Neural Networks},
  volume = {137},
  pages = {119--126},
  issn = {0893-6080},
  doi = {10.1016/j.neunet.2021.01.020},
  url = {https://www.sciencedirect.com/science/article/pii/S0893608021000289},
  urldate = {2024-05-02},
  abstract = {There is a longstanding debate whether the Kolmogorov–Arnold representation theorem can explain the use of more than one hidden layer in neural networks. The Kolmogorov–Arnold representation decomposes a multivariate function into an interior and an outer function and therefore has indeed a similar structure as a neural network with two hidden layers. But there are distinctive differences. One of the main obstacles is that the outer function depends on the represented function and can be wildly varying even if the represented function is smooth. We derive modifications of the Kolmogorov–Arnold representation that transfer smoothness properties of the represented function to the outer function and can be well approximated by ReLU networks. It appears that instead of two hidden layers, a more natural interpretation of the Kolmogorov–Arnold representation is that of a deep neural network where most of the layers are required to approximate the interior function.},
  keywords = {Deep ReLU networks,Function approximation,Kolmogorov–Arnold representation theorem,Space-filling curves},
  file = {/Users/Nasy/Zotero/storage/N7NKX5G4/Schmidt-Hieber - 2021 - The Kolmogorov–Arnold representation theorem revisited.pdf;/Users/Nasy/Zotero/storage/IASFLVRI/S0893608021000289.html}
}

@article{schmidtMachineLearningGravitational2021,
  title = {Machine {{Learning Gravitational Waves}} from {{Binary Black Hole Mergers}}},
  author = {Schmidt, Stefano and Breschi, Matteo and Gamba, Rossella and Pagano, Giulia and Rettegno, Piero and Riemenschneider, Gunnar and Bernuzzi, Sebastiano and Nagar, Alessandro and Del Pozzo, Walter},
  date = {2021-02-25},
  journaltitle = {Phys. Rev. D},
  volume = {103},
  number = {4},
  eprint = {2011.01958},
  eprinttype = {arXiv},
  pages = {043020},
  issn = {2470-0010, 2470-0029},
  doi = {10.1103/PhysRevD.103.043020},
  url = {http://arxiv.org/abs/2011.01958},
  urldate = {2021-03-25},
  abstract = {We apply machine learning methods to build a time-domain model for gravitational waveforms from binary black hole mergers, called mlgw. The dimensionality of the problem is handled by representing the waveform's amplitude and phase using a principal component analysis. We train mlgw on about \$\textbackslash mathcal\{O\}(10\textasciicircum 3)\$ TEOBResumS and SEOBNRv4 effective-one-body waveforms with mass ratios \$q\textbackslash in[1,20]\$ and aligned dimensionless spins \$s\textbackslash in[-0.80,0.95]\$. The resulting models are faithful to the training sets at the \$\{\textbackslash sim\}10\textasciicircum\{-3\}\$ level (averaged on the parameter space). The speed up for a single waveform generation is a factor 10 to 50 (depending on the binary mass and initial frequency) for TEOBResumS and approximately an order of magnitude more for SEOBNRv4. Furthermore, mlgw provides a closed form expression for the waveform and its gradient with respect to the orbital parameters; such an information might be useful for future improvements in GW data analysis. As demonstration of the capabilities of mlgw to perform a full parameter estimation, we re-analyze the public data from the first GW transient catalog (GWTC-1). We find broadly consistent results with previous analyses at a fraction of the cost, although the analysis with spin aligned waveforms gives systematic larger values of the effective spins with respect to previous analyses with precessing waveforms. Since the generation time does not depend on the length of the signal, our model is particularly suitable for the analysis of the long signals that are expected to be detected by third-generation detectors. Future applications include the analysis of waveform systematics and model selection in parameter estimation.},
  issue = {4},
  keywords = {General Relativity and Quantum Cosmology},
  annotation = {12 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/ZATAKTLM/Schmidt et al. - 2021 - Machine Learning Gravitational Waves from Binary B.pdf;/Users/Nasy/Zotero/storage/DWXGE9W5/2011.html}
}

@inproceedings{schroffFaceNetUnifiedEmbedding2015,
  title = {{{FaceNet}}: {{A}} Unified Embedding for Face Recognition and Clustering},
  shorttitle = {{{FaceNet}}},
  booktitle = {2015 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
  author = {Schroff, Florian and Kalenichenko, Dmitry and Philbin, James},
  date = {2015-06},
  pages = {815--823},
  issn = {1063-6919},
  doi = {10.1109/CVPR.2015.7298682},
  abstract = {Despite significant recent advances in the field of face recognition [10, 14, 15, 17], implementing face verification and recognition efficiently at scale presents serious challenges to current approaches. In this paper we present a system, called FaceNet, that directly learns a mapping from face images to a compact Euclidean space where distances directly correspond to a measure offace similarity. Once this space has been produced, tasks such as face recognition, verification and clustering can be easily implemented using standard techniques with FaceNet embeddings asfeature vectors. Our method uses a deep convolutional network trained to directly optimize the embedding itself, rather than an intermediate bottleneck layer as in previous deep learning approaches. To train, we use triplets of roughly aligned matching / non-matching face patches generated using a novel online triplet mining method. The benefit of our approach is much greater representational efficiency: we achieve state-of-the-artface recognition performance using only 128-bytes perface. On the widely used Labeled Faces in the Wild (LFW) dataset, our system achieves a new record accuracy of 99.63\%. On YouTube Faces DB it achieves 95.12\%. Our system cuts the error rate in comparison to the best published result [15] by 30\% on both datasets.},
  eventtitle = {2015 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
  keywords = {Accuracy,Artificial neural networks,Face,Face recognition,Principal component analysis,Standards,Training},
  annotation = {5004 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/U5ADNNSG/Schroff et al. - 2015 - FaceNet A unified embedding for face recognition .pdf}
}

@inproceedings{schultzLearningDistanceMetric2003,
  title = {Learning a {{Distance Metric}} from {{Relative Comparisons}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Schultz, Matthew and Joachims, Thorsten},
  editor = {Thrun, S. and Saul, L. and Schölkopf, B.},
  date = {2003},
  volume = {16},
  publisher = {MIT Press},
  url = {https://proceedings.neurips.cc/paper/2003/file/d3b1fb02964aa64e257f9f26a31f72cf-Paper.pdf},
  keywords = {nosource}
}

@article{schumacherNeoantigensCancerImmunotherapy2015,
  title = {Neoantigens in Cancer Immunotherapy},
  author = {Schumacher, Ton N. and Schreiber, Robert D.},
  date = {2015},
  journaltitle = {Science},
  volume = {348},
  number = {6230},
  pages = {69--74},
  doi = {10.1126/science.aaa4971},
  url = {https://www.science.org/doi/abs/10.1126/science.aaa4971},
  abstract = {The clinical relevance of T cells in the control of a diverse set of human cancers is now beyond doubt. However, the nature of the antigens that allow the immune system to distinguish cancer cells from noncancer cells has long remained obscure. Recent technological innovations have made it possible to dissect the immune response to patient-specific neoantigens that arise as a consequence of tumor-specific mutations, and emerging data suggest that recognition of such neoantigens is a major factor in the activity of clinical immunotherapies. These observations indicate that neoantigen load may form a biomarker in cancer immunotherapy and provide an incentive for the development of novel therapeutic approaches that selectively enhance T cell reactivity against this class of antigens.},
  keywords = {nosource},
  annotation = {2824 citations (Crossref) [2022-08-03]}
}

@article{scottClassificationProteinBindingSites2022,
  title = {Classification of {{Protein-Binding Sites Using}} a {{Spherical Convolutional Neural Network}}},
  author = {Scott, Oliver B. and Gu, Jing and Chan, A.W. Edith},
  date = {2022-11-28},
  journaltitle = {J. Chem. Inf. Model.},
  volume = {62},
  number = {22},
  pages = {5383--5396},
  publisher = {American Chemical Society},
  issn = {1549-9596},
  doi = {10.1021/acs.jcim.2c00832},
  url = {https://doi.org/10.1021/acs.jcim.2c00832},
  urldate = {2022-12-02},
  abstract = {The analysis and comparison of protein-binding sites aid various applications in the drug discovery process, e.g., hit finding, drug repurposing, and polypharmacology. Classification of binding sites has been a hot topic for the past 30 years, and many different methods have been published. The rapid development of machine learning computational algorithms, coupled with the large volume of publicly available protein–ligand 3D structures, makes it possible to apply deep learning techniques in binding site comparison. Our method uses a cutting-edge spherical convolutional neural network based on the DeepSphere architecture to learn global representations of protein-binding sites. The model was trained on TOUGH-C1 and TOUGH-M1 data and validated with the ProSPECCTs datasets. Our results show that our model can (1) perform well in protein-binding site similarity and classification tasks and (2) learn and separate the physicochemical properties of binding sites. Lastly, we tested the model on a set of kinases, where the results show that it is able to cluster the different kinase subfamilies effectively. This example demonstrates the method’s promise for lead hopping within or outside a protein target, directly based on binding site information.},
  annotation = {0 citations (Crossref) [2022-12-02]},
  file = {/Users/Nasy/Zotero/storage/N75MXYMF/Scott et al. - 2022 - Classification of Protein-Binding Sites Using a Sp.pdf;/Users/Nasy/Zotero/storage/HK8EX47E/acs.jcim.html}
}

@article{seniorImprovedProteinStructure2020,
  title = {Improved Protein Structure Prediction Using Potentials from Deep Learning},
  author = {Senior, Andrew W. and Evans, Richard and Jumper, John and Kirkpatrick, James and Sifre, Laurent and Green, Tim and Qin, Chongli and Žídek, Augustin and Nelson, Alexander W. R. and Bridgland, Alex and Penedones, Hugo and Petersen, Stig and Simonyan, Karen and Crossan, Steve and Kohli, Pushmeet and Jones, David T. and Silver, David and Kavukcuoglu, Koray and Hassabis, Demis},
  date = {2020-01},
  journaltitle = {Nature},
  volume = {577},
  number = {7792},
  pages = {706--710},
  publisher = {Nature Publishing Group},
  issn = {1476-4687},
  doi = {10.1038/s41586-019-1923-7},
  url = {https://www.nature.com/articles/s41586-019-1923-7},
  urldate = {2024-10-30},
  abstract = {Protein structure prediction can be used to determine the three-dimensional shape of a protein from its amino acid sequence1. This problem is of fundamental importance as the structure of a protein largely determines its function2; however, protein structures can be difficult to determine experimentally. Considerable progress has recently been made by leveraging genetic information. It is possible to infer which amino acid residues are in contact by analysing covariation in homologous sequences, which aids in the prediction of protein structures3. Here we show that we can train a neural network to make accurate predictions of the distances between pairs of residues, which convey more information about the structure than contact predictions. Using this information, we construct a potential of mean force4 that can accurately describe the shape of a protein. We find that the resulting potential can be optimized by a simple gradient descent algorithm to generate structures without complex sampling procedures. The resulting system, named AlphaFold, achieves high accuracy, even for sequences with fewer homologous sequences. In the recent Critical Assessment of Protein Structure Prediction5 (CASP13)—a blind assessment of the state of the field—AlphaFold created high-accuracy structures (with template modelling (TM) scores6 of 0.7 or higher) for 24 out of 43 free modelling domains, whereas the next best method, which used sampling and contact information, achieved such accuracy for only 14 out of 43 domains. AlphaFold represents a considerable advance in protein-structure prediction. We expect this increased accuracy to enable insights into the function and malfunction of proteins, especially in cases for which no structures for homologous proteins have been experimentally determined7.},
  langid = {english},
  keywords = {Machine learning,Protein structure predictions}
}

@online{SequencebasedPredictionProtein,
  title = {Sequence-Based Prediction of Protein Protein Interaction Using a Deep-Learning Algorithm | {{BMC Bioinformatics}} | {{Full Text}}},
  url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-017-1700-2},
  urldate = {2022-08-19}
}

@online{SequencebasedPredictionProteina,
  title = {Sequence-Based Prediction of Protein Protein Interaction Using a Deep-Learning Algorithm - {{PubMed}}},
  url = {https://pubmed.ncbi.nlm.nih.gov/28545462/},
  urldate = {2022-08-19},
  file = {/Users/Nasy/Zotero/storage/QP9AK2G7/28545462.html}
}

@online{SequencebasedPredictionProteinb,
  title = {Sequence-Based Prediction of Protein Protein Interaction Using a Deep-Learning Algorithm - {{PubMed}}},
  url = {https://pubmed.ncbi.nlm.nih.gov/28545462/},
  urldate = {2022-08-19}
}

@online{shallueMeasuringEffectsData2019,
  title = {Measuring the {{Effects}} of {{Data Parallelism}} on {{Neural Network Training}}},
  author = {Shallue, Christopher J. and Lee, Jaehoon and Antognini, Joseph and Sohl-Dickstein, Jascha and Frostig, Roy and Dahl, George E.},
  date = {2019-07-18},
  eprint = {1811.03600},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.1811.03600},
  url = {http://arxiv.org/abs/1811.03600},
  abstract = {Recent hardware developments have dramatically increased the scale of data parallelism available for neural network training. Among the simplest ways to harness next-generation hardware is to increase the batch size in standard mini-batch neural network training algorithms. In this work, we aim to experimentally characterize the effects of increasing the batch size on training time, as measured by the number of steps necessary to reach a goal out-of-sample error. We study how this relationship varies with the training algorithm, model, and data set, and find extremely large variation between workloads. Along the way, we show that disagreements in the literature on how batch size affects model quality can largely be explained by differences in metaparameter tuning and compute budgets at different batch sizes. We find no evidence that larger batch sizes degrade out-of-sample performance. Finally, we discuss the implications of our results on efforts to train neural networks much faster in the future. Our experimental data is publicly available as a database of 71,638,836 loss measurements taken over the course of training for 168,160 individual models across 35 workloads.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/ZSQ34IZX/Shallue et al. - 2019 - Measuring the Effects of Data Parallelism on Neura.pdf;/Users/Nasy/Zotero/storage/IA8HYFYF/1811.html}
}

@online{shazeerGLUVariantsImprove2020,
  title = {{{GLU Variants Improve Transformer}}},
  author = {Shazeer, Noam},
  date = {2020-02-12},
  eprint = {2002.05202},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.2002.05202},
  url = {http://arxiv.org/abs/2002.05202},
  urldate = {2023-05-19},
  abstract = {Gated Linear Units (arXiv:1612.08083) consist of the component-wise product of two linear projections, one of which is first passed through a sigmoid function. Variations on GLU are possible, using different nonlinear (or even linear) functions in place of sigmoid. We test these variants in the feed-forward sublayers of the Transformer (arXiv:1706.03762) sequence-to-sequence model, and find that some of them yield quality improvements over the typically-used ReLU or GELU activations.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing,Statistics - Machine Learning}
}

@online{shazeerGLUVariantsImprove2020a,
  title = {{{GLU Variants Improve Transformer}}},
  author = {Shazeer, Noam},
  date = {2020-02-12},
  eprint = {2002.05202},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.2002.05202},
  url = {http://arxiv.org/abs/2002.05202},
  abstract = {Gated Linear Units (arXiv:1612.08083) consist of the component-wise product of two linear projections, one of which is first passed through a sigmoid function. Variations on GLU are possible, using different nonlinear (or even linear) functions in place of sigmoid. We test these variants in the feed-forward sublayers of the Transformer (arXiv:1706.03762) sequence-to-sequence model, and find that some of them yield quality improvements over the typically-used ReLU or GELU activations.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/AB6YT5KV/Shazeer - 2020 - GLU Variants Improve Transformer.pdf;/Users/Nasy/Zotero/storage/AX7NV82A/2002.html}
}

@inproceedings{shenDenoisingGravitationalWaves2019,
  title = {Denoising {{Gravitational Waves}} with {{Enhanced Deep Recurrent Denoising Auto-encoders}}},
  booktitle = {{{ICASSP}} 2019 - 2019 {{IEEE International Conference}} on {{Acoustics}}, {{Speech}} and {{Signal Processing}} ({{ICASSP}})},
  author = {Shen, Hongyu and George, Daniel and Huerta, Eliu. A. and Zhao, Zhizhen},
  date = {2019-05},
  pages = {3237--3241},
  issn = {2379-190X},
  doi = {10.1109/ICASSP.2019.8683061},
  abstract = {Denoising of time domain data is a crucial task for many applications such as communication, translation, virtual assistants etc. For this task, a combination of a recurrent neural net (RNNs) with a Denoising Auto-Encoder (DAEs) has shown promising results. However, this combined model is challenged when operating with low signal-to-noise ratio (SNR) data embedded in non-Gaussian and non-stationary noise. To address this issue, we design a novel model, referred to as `Enhanced Deep Recurrent Denoising Auto-Encoder' (EDR-DAE), that incorporates a signal amplifier layer, and applies curriculum learning by first denoising high SNR signals, before gradually decreasing the SNR until the signals become noise dominated. We showcase the performance of EDR-DAE using time-series data that describes gravitational waves embedded in very noisy backgrounds. In addition, we show that EDRDAE can accurately denoise signals whose topology is significantly more complex than those used for training, demonstrating that our model generalizes to new classes of gravitational waves that are beyond the scope of established denoising algorithms.},
  eventtitle = {{{ICASSP}} 2019 - 2019 {{IEEE International Conference}} on {{Acoustics}}, {{Speech}} and {{Signal Processing}} ({{ICASSP}})},
  keywords = {Data models,denoising algorithm,gravitational waves,Noise measurement,Noise reduction,RNN,Signal to noise ratio,time series,Time series analysis,Training},
  annotation = {13 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/ZRV8I9V7/Shen et al. - 2019 - Denoising Gravitational Waves with Enhanced Deep R.pdf;/Users/Nasy/Zotero/storage/NTQR4EEC/8683061.html}
}

@article{sherstinskyFundamentalsRecurrentNeural2020,
  title = {Fundamentals of {{Recurrent Neural Network}} ({{RNN}}) and {{Long Short-Term Memory}} ({{LSTM}}) Network},
  author = {Sherstinsky, Alex},
  date = {2020-03-01},
  journaltitle = {Physica D: Nonlinear Phenomena},
  volume = {404},
  pages = {132306},
  issn = {0167-2789},
  doi = {10.1016/j.physd.2019.132306},
  url = {https://www.sciencedirect.com/science/article/pii/S0167278919305974},
  urldate = {2024-10-30},
  abstract = {Because of their effectiveness in broad practical applications, LSTM networks have received a wealth of coverage in scientific journals, technical blogs, and implementation guides. However, in most articles, the inference formulas for the LSTM network and its parent, RNN, are stated axiomatically, while the training formulas are omitted altogether. In addition, the technique of “unrolling” an RNN is routinely presented without justification throughout the literature. The goal of this tutorial is to explain the essential RNN and LSTM fundamentals in a single document. Drawing from concepts in Signal Processing, we formally derive the canonical RNN formulation from differential equations. We then propose and prove a precise statement, which yields the RNN unrolling technique. We also review the difficulties with training the standard RNN and address them by transforming the RNN into the “Vanilla LSTM”1 1The nickname “Vanilla LSTM” symbolizes this model’s flexibility and generality (Greff et~al., 2015). network through a series of logical arguments. We provide all equations pertaining to the LSTM system together with detailed descriptions of its constituent entities. Albeit unconventional, our choice of notation and the method for presenting the LSTM system emphasizes ease of understanding. As part of the analysis, we identify new opportunities to enrich the LSTM system and incorporate these extensions into the Vanilla LSTM network, producing the most general LSTM variant to date. The target reader has already been exposed to RNNs and LSTM networks through numerous available resources and is open to an alternative pedagogical approach. A Machine Learning practitioner seeking guidance for implementing our new augmented LSTM model in software for experimentation and research will find the insights and derivations in this treatise valuable as well.},
  keywords = {Convolutional input context windows,External input gate,LSTM,RNN,RNN unfolding/unrolling},
  file = {/Users/Nasy/Zotero/storage/VPUTM4U6/Sherstinsky - 2020 - Fundamentals of Recurrent Neural Network (RNN) and Long Short-Term Memory (LSTM) network.pdf;/Users/Nasy/Zotero/storage/F4IIT77R/S0167278919305974.html}
}

@article{shiEntitySetExpansion2020,
  title = {Entity Set Expansion in Knowledge Graph: A Heterogeneous Information Network Perspective},
  shorttitle = {Entity Set Expansion in Knowledge Graph},
  author = {Shi, Chuan and Ding, Jiayu and Cao, Xiaohuan and Hu, Linmei and Wu, Bin and Li, Xiaoli},
  date = {2020-09-29},
  journaltitle = {Front. Comput. Sci.},
  volume = {15},
  number = {1},
  pages = {151307},
  issn = {2095-2236},
  doi = {10.1007/s11704-020-9240-8},
  url = {https://doi.org/10.1007/s11704-020-9240-8},
  urldate = {2022-06-05},
  abstract = {Entity set expansion (ESE) aims to expand an entity seed set to obtain more entities which have common properties. ESE is important for many applications such as dictionary construction and query suggestion. Traditional ESE methods relied heavily on the text and Web information of entities. Recently, some ESE methods employed knowledge graphs (KGs) to extend entities. However, they failed to effectively and efficiently utilize the rich semantics contained in a KG and ignored the text information of entities in Wikipedia. In this paper, we model a KG as a heterogeneous information network (HIN) containing multiple types of objects and relations. Fine-grained multi-type meta paths are proposed to capture the hidden relation among seed entities in a KG and thus to retrieve candidate entities. Then we rank the entities according to the meta path based structural similarity. Furthermore, to utilize the text description of entities in Wikipedia, we propose an extended model CoMeSE++ which combines both structural information revealed by a KG and text information in Wikipedia for ESE. Extensive experiments on real-world datasets demonstrate that our model achieves better performance by combining structural and textual information of entities.},
  langid = {english},
  keywords = {entity set expansion,heterogeneous information network,knowledge graph,multi-type meta path},
  annotation = {4 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/62WCG4MW/Shi et al. - 2020 - Entity set expansion in knowledge graph a heterog.pdf}
}

@article{shiHeterogeneousInformationNetwork2019,
  title = {Heterogeneous {{Information Network Embedding}} for {{Recommendation}}},
  author = {Shi, Chuan and Hu, Binbin and Zhao, Wayne Xin and Yu, Philip S.},
  date = {2019-02-01},
  journaltitle = {IEEE Trans. on Knowl. and Data Eng.},
  volume = {31},
  number = {2},
  pages = {357--370},
  issn = {1041-4347},
  doi = {10.1109/TKDE.2018.2833443},
  url = {https://doi.org/10.1109/TKDE.2018.2833443},
  urldate = {2022-06-05},
  abstract = {Due to the flexibility in modelling data heterogeneity, heterogeneous information network (HIN) has been adopted to characterize complex and heterogeneous auxiliary data in recommender systems, called HIN based recommendation. It is challenging to develop effective methods for HIN based recommendation in both extraction and exploitation of the information from HINs. Most of HIN based recommendation methods rely on path based similarity, which cannot fully mine latent structure features of users and items. In this paper, we propose a novel heterogeneous network embedding based approach for HIN based recommendation, called HERec. To embed HINs, we design a meta-path based random walk strategy to generate meaningful node sequences for network embedding. The learned node embeddings are first transformed by a set of fusion functions, and subsequently integrated into an extended matrix factorization (MF) model. The extended MF model together with fusion functions are jointly optimized for the rating prediction task. Extensive experiments on three real-world datasets demonstrate the effectiveness of the HERec model. Moreover, we show the capability of the HERec model for the cold-start problem, and reveal that the transformed embedding information from HINs can improve the recommendation performance.},
  annotation = {363 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/FZWQSIWS/Shi et al. - 2019 - Heterogeneous Information Network Embedding for Re.pdf}
}

@online{shimizuHyperbolicNeuralNetworks2021,
  title = {Hyperbolic {{Neural Networks}}++},
  author = {Shimizu, Ryohei and Mukuta, Yusuke and Harada, Tatsuya},
  date = {2021-03-17},
  eprint = {2006.08210},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.2006.08210},
  url = {http://arxiv.org/abs/2006.08210},
  urldate = {2023-03-10},
  abstract = {Hyperbolic spaces, which have the capacity to embed tree structures without distortion owing to their exponential volume growth, have recently been applied to machine learning to better capture the hierarchical nature of data. In this study, we generalize the fundamental components of neural networks in a single hyperbolic geometry model, namely, the Poincar\textbackslash 'e ball model. This novel methodology constructs a multinomial logistic regression, fully-connected layers, convolutional layers, and attention mechanisms under a unified mathematical interpretation, without increasing the parameters. Experiments show the superior parameter efficiency of our methods compared to conventional hyperbolic components, and stability and outperformance over their Euclidean counterparts.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/JL8AEAJX/Shimizu et al. - 2021 - Hyperbolic Neural Networks++.pdf;/Users/Nasy/Zotero/storage/GB4HYAJE/2006.html}
}

@inproceedings{shiSemanticPathBased2015,
  title = {Semantic {{Path}} Based {{Personalized Recommendation}} on {{Weighted Heterogeneous Information Networks}}},
  booktitle = {Proceedings of the 24th {{ACM International}} on {{Conference}} on {{Information}} and {{Knowledge Management}}},
  author = {Shi, Chuan and Zhang, Zhiqiang and Luo, Ping and Yu, Philip S. and Yue, Yading and Wu, Bin},
  date = {2015-10-17},
  series = {{{CIKM}} '15},
  pages = {453--462},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/2806416.2806528},
  url = {https://doi.org/10.1145/2806416.2806528},
  urldate = {2022-06-05},
  abstract = {Recently heterogeneous information network (HIN) analysis has attracted a lot of attention, and many data mining tasks have been exploited on HIN. As an important data mining task, recommender system includes a lot of object types (e.g., users, movies, actors, and interest groups in movie recommendation) and the rich relations among object types, which naturally constitute a HIN. The comprehensive information integration and rich semantic information of HIN make it promising to generate better recommendations. However, conventional HINs do not consider the attribute values on links, and the widely used meta path in HIN may fail to accurately capture semantic relations among objects, due to the existence of rating scores (usually ranging from 1 to 5) between users and items in recommender system. In this paper, we are the first to propose the weighted HIN and weighted meta path concepts to subtly depict the path semantics through distinguishing different link attribute values. Furthermore, we propose a semantic path based personalized recommendation method SemRec to predict the rating scores of users on items. Through setting meta paths, SemRec not only flexibly integrates heterogeneous information but also obtains prioritized and personalized weights representing user preferences on paths. Experiments on two real datasets illustrate that SemRec achieves better recommendation performance through flexibly integrating information with the help of weighted meta paths.},
  isbn = {978-1-4503-3794-6},
  keywords = {heterogeneous information network,meta path,recommendation,similarity},
  annotation = {144 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/DGI4M3XH/Shi et al. - 2015 - Semantic Path based Personalized Recommendation on.pdf}
}

@incollection{shiStateoftheArtHeterogeneousGraph2022,
  title = {The {{State-of-the-Art}} of {{Heterogeneous Graph Representation}}},
  booktitle = {Heterogeneous {{Graph Representation Learning}} and {{Applications}}},
  author = {Shi, Chuan and Wang, Xiao and Yu, Philip S.},
  editor = {Shi, Chuan and Wang, Xiao and Yu, Philip S.},
  date = {2022},
  series = {Artificial {{Intelligence}}: {{Foundations}}, {{Theory}}, and {{Algorithms}}},
  pages = {9--25},
  publisher = {Springer},
  location = {Singapore},
  doi = {10.1007/978-981-16-6166-2_2},
  url = {https://doi.org/10.1007/978-981-16-6166-2_2},
  urldate = {2022-06-05},
  abstract = {In this chapter, we give a comprehensive review of the recent development on heterogeneous graph representation (HGR) methods and techniques. In the method aspect, according to the information used in HGR, existing works are divided into four categories, i.e., structure-preserved HGR, attribute-assisted HGR, dynamic HGR, and application-oriented HGR. In the technique aspect, we summarize five commonly used techniques in HGR and categorize them into shallow model and deep model. In addition, we also provide some public sources, e.g., benchmark datasets, source code, and available tools.},
  isbn = {9789811661662},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/MEMKPXUS/Shi et al. - 2022 - The State-of-the-Art of Heterogeneous Graph Repres.pdf}
}

@article{sirignanoDGMDeepLearning2018,
  title = {{{DGM}}: {{A}} Deep Learning Algorithm for Solving Partial Differential Equations},
  shorttitle = {{{DGM}}},
  author = {Sirignano, Justin and Spiliopoulos, Konstantinos},
  date = {2018-12},
  journaltitle = {Journal of Computational Physics},
  volume = {375},
  eprint = {1708.07469},
  eprinttype = {arXiv},
  eprintclass = {math, q-fin, stat},
  pages = {1339--1364},
  issn = {00219991},
  doi = {10.1016/j.jcp.2018.08.029},
  url = {http://arxiv.org/abs/1708.07469},
  urldate = {2022-10-20},
  abstract = {High-dimensional PDEs have been a longstanding computational challenge. We propose to solve high-dimensional PDEs by approximating the solution with a deep neural network which is trained to satisfy the differential operator, initial condition, and boundary conditions. Our algorithm is meshfree, which is key since meshes become infeasible in higher dimensions. Instead of forming a mesh, the neural network is trained on batches of randomly sampled time and space points. The algorithm is tested on a class of high-dimensional free boundary PDEs, which we are able to accurately solve in up to \$200\$ dimensions. The algorithm is also tested on a high-dimensional Hamilton-Jacobi-Bellman PDE and Burgers' equation. The deep learning algorithm approximates the general solution to the Burgers' equation for a continuum of different boundary conditions and physical conditions (which can be viewed as a high-dimensional space). We call the algorithm a "Deep Galerkin Method (DGM)" since it is similar in spirit to Galerkin methods, with the solution approximated by a neural network instead of a linear combination of basis functions. In addition, we prove a theorem regarding the approximation power of neural networks for a class of quasilinear parabolic PDEs.},
  keywords = {Mathematics - Numerical Analysis,Quantitative Finance - Computational Finance,Quantitative Finance - Mathematical Finance,Statistics - Machine Learning},
  annotation = {505 citations (Crossref) [2022-11-04]},
  file = {/Users/Nasy/Zotero/storage/3U9GHFSH/Sirignano and Spiliopoulos - 2018 - DGM A deep learning algorithm for solving partial.pdf;/Users/Nasy/Zotero/storage/P2W22RX5/1708.html}
}

@online{skopekMixedcurvatureVariationalAutoencoders2020,
  title = {Mixed-Curvature {{Variational Autoencoders}}},
  author = {Skopek, Ondrej and Ganea, Octavian-Eugen and Bécigneul, Gary},
  date = {2020-02-12},
  eprint = {1911.08411},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  doi = {10.48550/arXiv.1911.08411},
  url = {http://arxiv.org/abs/1911.08411},
  urldate = {2023-03-17},
  abstract = {Euclidean geometry has historically been the typical "workhorse" for machine learning applications due to its power and simplicity. However, it has recently been shown that geometric spaces with constant non-zero curvature improve representations and performance on a variety of data types and downstream tasks. Consequently, generative models like Variational Autoencoders (VAEs) have been successfully generalized to elliptical and hyperbolic latent spaces. While these approaches work well on data with particular kinds of biases e.g. tree-like data for a hyperbolic VAE, there exists no generic approach unifying and leveraging all three models. We develop a Mixed-curvature Variational Autoencoder, an efficient way to train a VAE whose latent space is a product of constant curvature Riemannian manifolds, where the per-component curvature is fixed or learnable. This generalizes the Euclidean VAE to curved latent spaces and recovers it when curvatures of all latent space components go to 0.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/W9WA7W8X/Skopek et al. - 2020 - Mixed-curvature Variational Autoencoders.pdf;/Users/Nasy/Zotero/storage/SLT8L3DT/1911.html}
}

@inproceedings{songCBrainDeepLearning2016,
  title = {C-{{Brain}}: {{A}} Deep Learning Accelerator That Tames the Diversity of {{CNNs}} through Adaptive Data-Level Parallelization},
  booktitle = {2016 53nd {{ACM}}/{{EDAC}}/{{IEEE Design Automation Conference}} ({{DAC}})},
  author = {Song, Lili and Wang, Ying and Han, Yinhe and Zhao, Xin and Liu, Bosheng and Li, Xiaowei},
  date = {2016},
  pages = {1--6},
  doi = {10.1145/2897937.2897995},
  keywords = {nosource},
  annotation = {75 citations (Crossref) [2022-08-03]}
}

@unpublished{songDeepMetricLearning2015,
  title = {Deep {{Metric Learning}} via {{Lifted Structured Feature Embedding}}},
  author = {Song, Hyun Oh and Xiang, Yu and Jegelka, Stefanie and Savarese, Silvio},
  date = {2015-11-19},
  eprint = {1511.06452},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/1511.06452},
  urldate = {2022-05-10},
  abstract = {Learning the distance metric between pairs of examples is of great importance for learning and visual recognition. With the remarkable success from the state of the art convolutional neural networks, recent works have shown promising results on discriminatively training the networks to learn semantic feature embeddings where similar examples are mapped close to each other and dissimilar examples are mapped farther apart. In this paper, we describe an algorithm for taking full advantage of the training batches in the neural network training by lifting the vector of pairwise distances within the batch to the matrix of pairwise distances. This step enables the algorithm to learn the state of the art feature embedding by optimizing a novel structured prediction objective on the lifted problem. Additionally, we collected Online Products dataset: 120k images of 23k classes of online products for metric learning. Our experiments on the CUB-200-2011, CARS196, and Online Products datasets demonstrate significant improvement over existing deep feature embedding methods on all experimented embedding sizes with the GoogLeNet network.},
  keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/6BXCLYFA/Song et al. - 2015 - Deep Metric Learning via Lifted Structured Feature.pdf;/Users/Nasy/Zotero/storage/D5RATPRP/1511.html}
}

@article{springerPredictionSpecificTCRPeptide2020,
  title = {Prediction of {{Specific TCR-Peptide Binding From Large Dictionaries}} of {{TCR-Peptide Pairs}}},
  author = {Springer, Ido and Besser, Hanan and Tickotsky-Moskovitz, Nili and Dvorkin, Shirit and Louzoun, Yoram},
  date = {2020},
  journaltitle = {Frontiers in Immunology},
  volume = {11},
  issn = {1664-3224},
  doi = {10.3389/fimmu.2020.01803},
  url = {https://www.frontiersin.org/articles/10.3389/fimmu.2020.01803},
  abstract = {Current sequencing methods allow for detailed samples of T cell receptors (TCR) repertoires. To determine from a repertoire whether its host had been exposed to a target, computational tools that predict TCR-epitope binding are required. Currents tools are based on conserved motifs and are applied to peptides with many known binding TCRs. We employ new Natural Language Processing (NLP) based methods to predict whether any TCR and peptide bind. We combined large-scale TCR-peptide dictionaries with deep learning methods to produce ERGO (pEptide tcR matchinG predictiOn), a highly specific and generic TCR-peptide binding predictor. A set of standard tests are defined for the performance of peptide-TCR binding, including the detection of TCRs binding to a given peptide/antigen, choosing among a set of candidate peptides for a given TCR and determining whether any pair of TCR-peptide bind. ERGO reaches similar results to state of the art methods in these tests even when not trained specifically for each test. The software implementation and data sets are available at https://github.com/louzounlab/ERGO. ERGO is also available through a webserver at: http://tcr.cs.biu.ac.il/.},
  file = {/Users/Nasy/Zotero/storage/HP9MCWAX/Springer et al. - 2020 - Prediction of Specific TCR-Peptide Binding From La.pdf}
}

@book{sternbergLecturesDifferentialGeometry1999,
  title = {Lectures on {{Differential Geometry}}},
  author = {Sternberg, Shlomo},
  date = {1999},
  eprint = {JuYODAAAQBAJ},
  eprinttype = {googlebooks},
  publisher = {American Mathematical Soc.},
  abstract = {This book is based on lectures given at Harvard University during the academic year 1960-1961. The presentation assumes knowledge of the elements of modern algebra (groups, vector spaces, etc.) and point-set topology and some elementary analysis. Rather than giving all the basic information or touching upon every topic in the field, this work treats various selected topics in differential geometry. The author concisely addresses standard material and spreads exercises throughout the text. His reprint has two additions to the original volume: a paper written jointly with V. Guillemin at the beginning of a period of intense interest in the equivalence problem and a short description from the author on results in the field that occurred between the first and the second printings.},
  isbn = {978-0-8218-1385-0},
  langid = {english},
  pagetotal = {466}
}

@inproceedings{suHRecHeterogeneousGraph2019,
  title = {{{HRec}}: {{Heterogeneous Graph Embedding-Based Personalized Point-of-Interest Recommendation}}},
  shorttitle = {{{HRec}}},
  booktitle = {Neural {{Information Processing}}},
  author = {Su, Yijun and Li, Xiang and Zha, Daren and Tang, Wei and Jiang, Yiwen and Xiang, Ji and Gao, Neng},
  editor = {Gedeon, Tom and Wong, Kok Wai and Lee, Minho},
  date = {2019},
  series = {Lecture {{Notes}} in {{Computer Science}}},
  pages = {37--49},
  publisher = {Springer International Publishing},
  location = {Cham},
  doi = {10.1007/978-3-030-36718-3_4},
  abstract = {POI (point-of-interest) recommendation as an important location-based service has been widely utilized in helping people discover attractive locations. A variety of available check-in data provide a good opportunity for developing personalized POI recommender systems. However, the extreme sparsity of check-in data and inefficiency of exploiting unobserved feedback pose severe challenges for POI recommendation. To cope with these challenges, we develop a heterogeneous graph embedding-based personalized POI recommendation framework called HRec. It consists of two modules: the learning module and the ranking module. Specifically, we first propose the learning module to produce a series of intermediate feedback from unobserved feedback by learning the embeddings of users and POIs in the heterogeneous graph. Then we devise the ranking module to recommend each user the ultimate ranked list of relevant POIs by utilizing two pairwise feedback comparisons. Experimental results on two real-world datasets demonstrate the effectiveness and superiority of the proposed method.},
  isbn = {978-3-030-36718-3},
  langid = {english},
  keywords = {Graph embedding,Personalized ranking,POI recommendation},
  annotation = {4 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/3HVXGEFN/Su et al. - 2019 - HRec Heterogeneous Graph Embedding-Based Personal.pdf}
}

@inproceedings{suhStochasticClassBasedHard2019,
  title = {Stochastic {{Class-Based Hard Example Mining}} for {{Deep Metric Learning}}},
  booktitle = {2019 {{IEEE}}/{{CVF Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
  author = {Suh, Yumin and Han, Bohyung and Kim, Wonsik and Lee, Kyoung Mu},
  date = {2019-06},
  pages = {7244--7252},
  issn = {2575-7075},
  doi = {10.1109/CVPR.2019.00742},
  abstract = {Performance of deep metric learning depends heavily on the capability of mining hard negative examples during training. However, many metric learning algorithms often require intractable computational cost due to frequent feature computations and nearest neighbor searches in a large-scale dataset. As a result, existing approaches often suffer from trade-off between training speed and prediction accuracy. To alleviate this limitation, we propose a stochastic hard negative mining method. Our key idea is to adopt class signatures that keep track of feature embedding online with minor additional cost during training, and identify hard negative example candidates using the signatures. Given an anchor instance, our algorithm first selects a few hard negative classes based on the class-to-sample distances and then performs a refined search in an instance-level only from the selected classes. As most of the classes are discarded at the first step, it is much more efficient than exhaustive search while effectively mining a large number of hard examples. Our experiment shows that the proposed technique improves image retrieval accuracy substantially; it achieves the state-of-the-art performance on the several standard benchmark datasets.},
  eventtitle = {2019 {{IEEE}}/{{CVF Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
  keywords = {Categorization,Deep Learning,Recognition: Detection,Retrieval},
  annotation = {29 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/MGHIP4AS/8953619.html}
}

@article{sunMiningHeterogeneousInformation,
  title = {Mining {{Heterogeneous Information Networks}}: {{A Structural Analysis Approach}}},
  author = {Sun, Yizhou and Han, Jiawei},
  volume = {14},
  number = {2},
  pages = {9},
  abstract = {Most objects and data in the real world are of multiple types, interconnected, forming complex, heterogeneous but often semi-structured information networks. However, most network science researchers are focused on homogeneous networks, without distinguishing different types of objects and links in the networks. We view interconnected, multityped data, including the typical relational database data, as heterogeneous information networks, study how to leverage the rich semantic meaning of structural types of objects and links in the networks, and develop a structural analysis approach on mining semi-structured, multi-typed heterogeneous information networks. In this article, we summarize a set of methodologies that can effectively and efficiently mine useful knowledge from such information networks, and point out some promising research directions.},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/IPWFHVT2/Sun and Han - Mining Heterogeneous Information Networks A Struc.pdf}
}

@article{sunMiningHeterogeneousInformation2012,
  title = {Mining {{Heterogeneous Information Networks}}: {{Principles}} and {{Methodologies}}},
  shorttitle = {Mining {{Heterogeneous Information Networks}}},
  author = {Sun, Yizhou and Han, Jiawei},
  date = {2012-07-18},
  journaltitle = {Synthesis Lectures on Data Mining and Knowledge Discovery},
  volume = {3},
  number = {2},
  pages = {1--159},
  publisher = {Morgan \& Claypool Publishers},
  issn = {2151-0067},
  doi = {10.2200/S00433ED1V01Y201207DMK005},
  url = {https://www.morganclaypool.com/doi/abs/10.2200/S00433ED1V01Y201207DMK005},
  urldate = {2022-05-27},
  keywords = {nosource},
  annotation = {183 citations (Crossref) [2022-08-03]}
}

@article{sunMiningHeterogeneousInformation2012a,
  title = {Mining {{Heterogeneous Information Networks}}: {{Principles}} and {{Methodologies}}},
  shorttitle = {Mining {{Heterogeneous Information Networks}}},
  author = {Sun, Yizhou and Han, Jiawei},
  date = {2012-07-18},
  journaltitle = {Synthesis Lectures on Data Mining and Knowledge Discovery},
  volume = {3},
  number = {2},
  pages = {1--159},
  publisher = {Morgan \& Claypool Publishers},
  issn = {2151-0067},
  doi = {10.2200/S00433ED1V01Y201207DMK005},
  url = {https://www.morganclaypool.com/doi/abs/10.2200/S00433ED1V01Y201207DMK005},
  urldate = {2022-05-27},
  abstract = {Real-world physical and abstract data objects are interconnected, forming gigantic, interconnected networks. By structuring these data objects and interactions between these objects into multiple types, such networks become semi-structured heterogeneous information networks. Most real-world applications that handle big data, including interconnected social media and social networks, scientific, engineering, or medical information systems, online e-commerce systems, and most database systems, can be structured into heterogeneous information networks. Therefore, effective analysis of large-scale heterogeneous information networks poses an interesting but critical challenge. In this book, we investigate the principles and methodologies of mining heterogeneous information networks. Departing from many existing network models that view interconnected data as homogeneous graphs or networks, our semi-structured heterogeneous information network model leverages the rich semantics of typed nodes and links in a network and uncovers surprisingly rich knowledge from the network. This semi-structured heterogeneous network modeling leads to a series of new principles and powerful methodologies for mining interconnected data, including: (1) rank-based clustering and classification; (2) meta-path-based similarity search and mining; (3) relation strength-aware mining, and many other potential developments. This book introduces this new research frontier and points out some promising research directions. Table of Contents: Introduction / Ranking-Based Clustering / Classification of Heterogeneous Information Networks / Meta-Path-Based Similarity Search / Meta-Path-Based Relationship Prediction / Relation Strength-Aware Clustering with Incomplete Attributes / User-Guided Clustering via Meta-Path Selection / Research Frontiers},
  langid = {american},
  annotation = {183 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/IUUYXWTA/Sun and Han - 2012 - Mining Heterogeneous Information Networks Princip.pdf;/Users/Nasy/Zotero/storage/LHSBT2E9/6812698.html}
}

@article{sunPathSimMetaPathbased2011,
  title = {{{PathSim}}: Meta Path-Based Top-{{K}} Similarity Search in Heterogeneous Information Networks},
  shorttitle = {{{PathSim}}},
  author = {Sun, Yizhou and Han, Jiawei and Yan, Xifeng and Yu, Philip S. and Wu, Tianyi},
  date = {2011-08-01},
  journaltitle = {Proc. VLDB Endow.},
  volume = {4},
  number = {11},
  pages = {992--1003},
  issn = {2150-8097},
  doi = {10.14778/3402707.3402736},
  url = {https://doi.org/10.14778/3402707.3402736},
  urldate = {2022-06-07},
  abstract = {Similarity search is a primitive operation in database and Web search engines. With the advent of large-scale heterogeneous information networks that consist of multi-typed, interconnected objects, such as the bibliographic networks and social media networks, it is important to study similarity search in such networks. Intuitively, two objects are similar if they are linked by many paths in the network. However, most existing similarity measures are defined for homogeneous networks. Different semantic meanings behind paths are not taken into consideration. Thus they cannot be directly applied to heterogeneous networks. In this paper, we study similarity search that is defined among the same type of objects in heterogeneous networks. Moreover, by considering different linkage paths in a network, one could derive various similarity semantics. Therefore, we introduce the concept of meta path-based similarity, where a meta path is a path consisting of a sequence of relations defined between different object types (i.e., structural paths at the meta level). No matter whether a user would like to explicitly specify a path combination given sufficient domain knowledge, or choose the best path by experimental trials, or simply provide training examples to learn it, meta path forms a common base for a network-based similarity search engine. In particular, under the meta path framework we define a novel similarity measure called PathSim that is able to find peer objects in the network (e.g., find authors in the similar field and with similar reputation), which turns out to be more meaningful in many scenarios compared with random-walk based similarity measures. In order to support fast online query processing for PathSim queries, we develop an efficient solution that partially materializes short meta paths and then concatenates them online to compute top-k results. Experiments on real data sets demonstrate the effectiveness and efficiency of our proposed paradigm.},
  annotation = {766 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/L3ZCENA4/Sun et al. - 2011 - PathSim meta path-based top-K similarity search i.pdf}
}

@unpublished{sunPredictingSolarFlares2022,
  title = {Predicting {{Solar Flares Using CNN}} and {{LSTM}} on {{Two Solar Cycles}} of {{Active Region Data}}},
  author = {Sun, Zeyu and Bobra, Monica G. and Wang, Xiantong and Wang, Yu and Sun, Hu and Gombosi, Tamas and Chen, Yang and Hero, Alfred},
  date = {2022-04-07},
  eprint = {2204.03710},
  eprinttype = {arXiv},
  eprintclass = {astro-ph},
  url = {http://arxiv.org/abs/2204.03710},
  urldate = {2022-04-11},
  abstract = {We consider the flare prediction problem that distinguishes flare-imminent active regions that produce an M- or X-class flare in the future 24 hours, from quiet active regions that do not produce any flare within \$\textbackslash pm 24\$ hours. Using line-of-sight magnetograms and parameters of active regions in two data products covering Solar Cycle 23 and 24, we train and evaluate two deep learning algorithms -- CNN and LSTM -- and their stacking ensembles. The decisions of CNN are explained using visual attribution methods. We have the following three main findings. (1) LSTM trained on data from two solar cycles achieves significantly higher True Skill Scores (TSS) than that trained on data from a single solar cycle with a confidence level of at least 0.95. (2) On data from Solar Cycle 23, a stacking ensemble that combines predictions from LSTM and CNN using the TSS criterion achieves significantly higher TSS than the "select-best" strategy with a confidence level of at least 0.95. (3) A visual attribution method called Integrated Gradients is able to attribute the CNN's predictions of flares to the emerging magnetic flux in the active region. It also reveals a limitation of CNN as a flare prediction method using line-of-sight magnetograms: it treats the polarity artifact of line-of-sight magnetograms as positive evidence of flares.},
  keywords = {Astrophysics - Solar and Stellar Astrophysics,Computer Science - Artificial Intelligence,Computer Science - Computer Vision and Pattern Recognition},
  file = {/Users/Nasy/Zotero/storage/NJ3JM533/Sun et al. - 2022 - Predicting Solar Flares Using CNN and LSTM on Two .pdf;/Users/Nasy/Zotero/storage/F36K2ZCU/2204.html}
}

@article{sunSequencebasedPredictionProtein2017,
  title = {Sequence-Based Prediction of Protein Protein Interaction Using a Deep-Learning Algorithm},
  author = {Sun, Tanlin and Zhou, Bo and Lai, Luhua and Pei, Jianfeng},
  date = {2017-05-25},
  journaltitle = {BMC Bioinformatics},
  volume = {18},
  number = {1},
  pages = {277},
  issn = {1471-2105},
  doi = {10.1186/s12859-017-1700-2},
  url = {https://doi.org/10.1186/s12859-017-1700-2},
  urldate = {2022-08-19},
  abstract = {Protein-protein interactions (PPIs) are critical for many biological processes. It is therefore important to develop accurate high-throughput methods for identifying PPI to better understand protein function, disease occurrence, and therapy design. Though various computational methods for predicting PPI have been developed, their robustness for prediction with external datasets is unknown. Deep-learning algorithms have achieved successful results in diverse areas, but their effectiveness for PPI prediction has not been tested.},
  keywords = {Deep learning,Protein-protein interaction},
  annotation = {177 citations (Crossref) [2022-08-19]},
  file = {/Users/Nasy/Zotero/storage/Q2VH2IKU/Sun et al. - 2017 - Sequence-based prediction of protein protein inter.pdf}
}

@online{suRoFormerEnhancedTransformer2022,
  title = {{{RoFormer}}: {{Enhanced Transformer}} with {{Rotary Position Embedding}}},
  shorttitle = {{{RoFormer}}},
  author = {Su, Jianlin and Lu, Yu and Pan, Shengfeng and Murtadha, Ahmed and Wen, Bo and Liu, Yunfeng},
  date = {2022-08-08},
  eprint = {2104.09864},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2104.09864},
  url = {http://arxiv.org/abs/2104.09864},
  urldate = {2023-05-19},
  abstract = {Position encoding recently has shown effective in the transformer architecture. It enables valuable supervision for dependency modeling between elements at different positions of the sequence. In this paper, we first investigate various methods to integrate positional information into the learning process of transformer-based language models. Then, we propose a novel method named Rotary Position Embedding(RoPE) to effectively leverage the positional information. Specifically, the proposed RoPE encodes the absolute position with a rotation matrix and meanwhile incorporates the explicit relative position dependency in self-attention formulation. Notably, RoPE enables valuable properties, including the flexibility of sequence length, decaying inter-token dependency with increasing relative distances, and the capability of equipping the linear self-attention with relative position encoding. Finally, we evaluate the enhanced transformer with rotary position embedding, also called RoFormer, on various long text classification benchmark datasets. Our experiments show that it consistently overcomes its alternatives. Furthermore, we provide a theoretical analysis to explain some experimental results. RoFormer is already integrated into Huggingface: \textbackslash url\{https://huggingface.co/docs/transformers/model\_doc/roformer\}.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/L9YNHGCY/Su et al. - 2022 - RoFormer Enhanced Transformer with Rotary Positio.pdf;/Users/Nasy/Zotero/storage/LRWIS35S/2104.html}
}

@online{thomasTensorFieldNetworks2018,
  title = {Tensor Field Networks: {{Rotation-}} and Translation-Equivariant Neural Networks for {{3D}} Point Clouds},
  shorttitle = {Tensor Field Networks},
  author = {Thomas, Nathaniel and Smidt, Tess and Kearnes, Steven and Yang, Lusann and Li, Li and Kohlhoff, Kai and Riley, Patrick},
  date = {2018-05-18},
  eprint = {1802.08219},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.1802.08219},
  url = {http://arxiv.org/abs/1802.08219},
  urldate = {2022-11-10},
  abstract = {We introduce tensor field neural networks, which are locally equivariant to 3D rotations, translations, and permutations of points at every layer. 3D rotation equivariance removes the need for data augmentation to identify features in arbitrary orientations. Our network uses filters built from spherical harmonics; due to the mathematical consequences of this filter choice, each layer accepts as input (and guarantees as output) scalars, vectors, and higher-order tensors, in the geometric sense of these terms. We demonstrate the capabilities of tensor field networks with tasks in geometry, physics, and chemistry.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing},
  file = {/Users/Nasy/Zotero/storage/MKJFH9EM/Thomas et al. - 2018 - Tensor field networks Rotation- and translation-e.pdf;/Users/Nasy/Zotero/storage/EEIH7EMQ/1802.html}
}

@article{thorssonImmuneLandscapeCancer2018,
  title = {The {{Immune Landscape}} of {{Cancer}}},
  author = {Thorsson, Vésteinn and Gibbs, David L. and Brown, Scott D. and Wolf, Denise and Bortone, Dante S. and Ou Yang, Tai-Hsien and Porta-Pardo, Eduard and Gao, Galen F. and Plaisier, Christopher L. and Eddy, James A. and Ziv, Elad and Culhane, Aedin C. and Paull, Evan O. and Sivakumar, I.K. Ashok and Gentles, Andrew J. and Malhotra, Raunaq and Farshidfar, Farshad and Colaprico, Antonio and Parker, Joel S. and Mose, Lisle E. and Vo, Nam Sy and Liu, Jianfang and Liu, Yuexin and Rader, Janet and Dhankani, Varsha and Reynolds, Sheila M. and Bowlby, Reanne and Califano, Andrea and Cherniack, Andrew D. and Anastassiou, Dimitris and Bedognetti, Davide and Mokrab, Younes and Newman, Aaron M. and Rao, Arvind and Chen, Ken and Krasnitz, Alexander and Hu, Hai and Malta, Tathiane M. and Noushmehr, Houtan and Pedamallu, Chandra Sekhar and Bullman, Susan and Ojesina, Akinyemi I. and Lamb, Andrew and Zhou, Wanding and Shen, Hui and Choueiri, Toni K. and Weinstein, John N. and Guinney, Justin and Saltz, Joel and Holt, Robert A. and Rabkin, Charles S. and Lazar, Alexander J. and Serody, Jonathan S. and Demicco, Elizabeth G. and Disis, Mary L. and Vincent, Benjamin G. and Shmulevich, Ilya and Caesar-Johnson, Samantha J. and Demchok, John A. and Felau, Ina and Kasapi, Melpomeni and Ferguson, Martin L. and Hutter, Carolyn M. and Sofia, Heidi J. and Tarnuzzer, Roy and Wang, Zhining and Yang, Liming and Zenklusen, Jean C. and Zhang, Jiashan (Julia) and Chudamani, Sudha and Liu, Jia and Lolla, Laxmi and Naresh, Rashi and Pihl, Todd and Sun, Qiang and Wan, Yunhu and Wu, Ye and Cho, Juok and DeFreitas, Timothy and Frazer, Scott and Gehlenborg, Nils and Getz, Gad and Heiman, David I. and Kim, Jaegil and Lawrence, Michael S. and Lin, Pei and Meier, Sam and Noble, Michael S. and Saksena, Gordon and Voet, Doug and Zhang, Hailei and Bernard, Brady and Chambwe, Nyasha and Dhankani, Varsha and Knijnenburg, Theo and Kramer, Roger and Leinonen, Kalle and Liu, Yuexin and Miller, Michael and Reynolds, Sheila and Shmulevich, Ilya and Thorsson, Vesteinn and Zhang, Wei and Akbani, Rehan and Broom, Bradley M. and Hegde, Apurva M. and Ju, Zhenlin and Kanchi, Rupa S. and Korkut, Anil and Li, Jun and Liang, Han and Ling, Shiyun and Liu, Wenbin and Lu, Yiling and Mills, Gordon B. and Ng, Kwok-Shing and Rao, Arvind and Ryan, Michael and Wang, Jing and Weinstein, John N. and Zhang, Jiexin and Abeshouse, Adam and Armenia, Joshua and Chakravarty, Debyani and Chatila, Walid K. and De Bruijn, Ino and Gao, Jianjiong and Gross, Benjamin E. and Heins, Zachary J. and Kundra, Ritika and La, Konnor and Ladanyi, Marc and Luna, Augustin and Nissan, Moriah G. and Ochoa, Angelica and Phillips, Sarah M. and Reznik, Ed and Sanchez-Vega, Francisco and Sander, Chris and Schultz, Nikolaus and Sheridan, Robert and Sumer, S. Onur and Sun, Yichao and Taylor, Barry S. and Wang, Jioajiao and Zhang, Hongxin and Anur, Pavana and Peto, Myron and Spellman, Paul and Benz, Christopher and Stuart, Joshua M. and Wong, Christopher K. and Yau, Christina and Hayes, D. Neil and Parker, Joel S. and Wilkerson, Matthew D. and Ally, Adrian and Balasundaram, Miruna and Bowlby, Reanne and Brooks, Denise and Carlsen, Rebecca and Chuah, Eric and Dhalla, Noreen and Holt, Robert and Jones, Steven J.M. and Kasaian, Katayoon and Lee, Darlene and Ma, Yussanne and Marra, Marco A. and Mayo, Michael and Moore, Richard A. and Mungall, Andrew J. and Mungall, Karen and Robertson, A. Gordon and Sadeghi, Sara and Schein, Jacqueline E. and Sipahimalani, Payal and Tam, Angela and Thiessen, Nina and Tse, Kane and Wong, Tina and Berger, Ashton C. and Beroukhim, Rameen and Cherniack, Andrew D. and Cibulskis, Carrie and Gabriel, Stacey B. and Gao, Galen F. and Ha, Gavin and Meyerson, Matthew and Schumacher, Steven E. and Shih, Juliann and Kucherlapati, Melanie H. and Kucherlapati, Raju S. and Baylin, Stephen and Cope, Leslie and Danilova, Ludmila and Bootwalla, Moiz S. and Lai, Phillip H. and Maglinte, Dennis T. and Van Den Berg, David J. and Weisenberger, Daniel J. and Auman, J. Todd and Balu, Saianand and Bodenheimer, Tom and Fan, Cheng and Hoadley, Katherine A. and Hoyle, Alan P. and Jefferys, Stuart R. and Jones, Corbin D. and Meng, Shaowu and Mieczkowski, Piotr A. and Mose, Lisle E. and Perou, Amy H. and Perou, Charles M. and Roach, Jeffrey and Shi, Yan and Simons, Janae V. and Skelly, Tara and Soloway, Matthew G. and Tan, Donghui and Veluvolu, Umadevi and Fan, Huihui and Hinoue, Toshinori and Laird, Peter W. and Shen, Hui and Zhou, Wanding and Bellair, Michelle and Chang, Kyle and Covington, Kyle and Creighton, Chad J. and Dinh, Huyen and Doddapaneni, HarshaVardhan and Donehower, Lawrence A. and Drummond, Jennifer and Gibbs, Richard A. and Glenn, Robert and Hale, Walker and Han, Yi and Hu, Jianhong and Korchina, Viktoriya and Lee, Sandra and Lewis, Lora and Li, Wei and Liu, Xiuping and Morgan, Margaret and Morton, Donna and Muzny, Donna and Santibanez, Jireh and Sheth, Margi and Shinbrot, Eve and Wang, Linghua and Wang, Min and Wheeler, David A. and Xi, Liu and Zhao, Fengmei and Hess, Julian and Appelbaum, Elizabeth L. and Bailey, Matthew and Cordes, Matthew G. and Ding, Li and Fronick, Catrina C. and Fulton, Lucinda A. and Fulton, Robert S. and Kandoth, Cyriac and Mardis, Elaine R. and McLellan, Michael D. and Miller, Christopher A. and Schmidt, Heather K. and Wilson, Richard K. and Crain, Daniel and Curley, Erin and Gardner, Johanna and Lau, Kevin and Mallery, David and Morris, Scott and Paulauskis, Joseph and Penny, Robert and Shelton, Candace and Shelton, Troy and Sherman, Mark and Thompson, Eric and Yena, Peggy and Bowen, Jay and Gastier-Foster, Julie M. and Gerken, Mark and Leraas, Kristen M. and Lichtenberg, Tara M. and Ramirez, Nilsa C. and Wise, Lisa and Zmuda, Erik and Corcoran, Niall and Costello, Tony and Hovens, Christopher and Carvalho, Andre L. and De Carvalho, Ana C. and Fregnani, José H. and Longatto-Filho, Adhemar and Reis, Rui M. and Scapulatempo-Neto, Cristovam and Silveira, Henrique C.S. and Vidal, Daniel O. and Burnette, Andrew and Eschbacher, Jennifer and Hermes, Beth and Noss, Ardene and Singh, Rosy and Anderson, Matthew L. and Castro, Patricia D. and Ittmann, Michael and Huntsman, David and Kohl, Bernard and Le, Xuan and Thorp, Richard and Andry, Chris and Duffy, Elizabeth R. and Lyadov, Vladimir and Paklina, Oxana and Setdikova, Galiya and Shabunin, Alexey and Tavobilov, Mikhail and McPherson, Christopher and Warnick, Ronald and Berkowitz, Ross and Cramer, Daniel and Feltmate, Colleen and Horowitz, Neil and Kibel, Adam and Muto, Michael and Raut, Chandrajit P. and Malykh, Andrei and Barnholtz-Sloan, Jill S. and Barrett, Wendi and Devine, Karen and Fulop, Jordonna and Ostrom, Quinn T. and Shimmel, Kristen and Wolinsky, Yingli and Sloan, Andrew E. and De Rose, Agostino and Giuliante, Felice and Goodman, Marc and Karlan, Beth Y. and Hagedorn, Curt H. and Eckman, John and Harr, Jodi and Myers, Jerome and Tucker, Kelinda and Zach, Leigh Anne and Deyarmin, Brenda and Hu, Hai and Kvecher, Leonid and Larson, Caroline and Mural, Richard J. and Somiari, Stella and Vicha, Ales and Zelinka, Tomas and Bennett, Joseph and Iacocca, Mary and Rabeno, Brenda and Swanson, Patricia and Latour, Mathieu and Lacombe, Louis and Têtu, Bernard and Bergeron, Alain and McGraw, Mary and Staugaitis, Susan M. and Chabot, John and Hibshoosh, Hanina and Sepulveda, Antonia and Su, Tao and Wang, Timothy and Potapova, Olga and Voronina, Olga and Desjardins, Laurence and Mariani, Odette and Roman-Roman, Sergio and Sastre, Xavier and Stern, Marc-Henri and Cheng, Feixiong and Signoretti, Sabina and Berchuck, Andrew and Bigner, Darell and Lipp, Eric and Marks, Jeffrey and McCall, Shannon and McLendon, Roger and Secord, Angeles and Sharp, Alexis and Behera, Madhusmita and Brat, Daniel J. and Chen, Amy and Delman, Keith and Force, Seth and Khuri, Fadlo and Magliocca, Kelly and Maithel, Shishir and Olson, Jeffrey J. and Owonikoko, Taofeek and Pickens, Alan and Ramalingam, Suresh and Shin, Dong M. and Sica, Gabriel and Van Meir, Erwin G. and Zhang, Hongzheng and Eijckenboom, Wil and Gillis, Ad and Korpershoek, Esther and Looijenga, Leendert and Oosterhuis, Wolter and Stoop, Hans and Van Kessel, Kim E. and Zwarthoff, Ellen C. and Calatozzolo, Chiara and Cuppini, Lucia and Cuzzubbo, Stefania and DiMeco, Francesco and Finocchiaro, Gaetano and Mattei, Luca and Perin, Alessandro and Pollo, Bianca and Chen, Chu and Houck, John and Lohavanichbutr, Pawadee and Hartmann, Arndt and Stoehr, Christine and Stoehr, Robert and Taubert, Helge and Wach, Sven and Wullich, Bernd and Kycler, Witold and Murawa, Dawid and Wiznerowicz, Maciej and Chung, Ki and Edenfield, W. Jeffrey and Martin, Julie and Baudin, Eric and Bubley, Glenn and Bueno, Raphael and De Rienzo, Assunta and Richards, William G. and Kalkanis, Steven and Mikkelsen, Tom and Noushmehr, Houtan and Scarpace, Lisa and Girard, Nicolas and Aymerich, Marta and Campo, Elias and Giné, Eva and Guillermo, Armando López and Van Bang, Nguyen and Hanh, Phan Thi and Phu, Bui Duc and Tang, Yufang and Colman, Howard and Evason, Kimberley and Dottino, Peter R. and Martignetti, John A. and Gabra, Hani and Juhl, Hartmut and Akeredolu, Teniola and Stepa, Serghei and Hoon, Dave and Ahn, Keunsoo and Kang, Koo Jeong and Beuschlein, Felix and Breggia, Anne and Birrer, Michael and Bell, Debra and Borad, Mitesh and Bryce, Alan H. and Castle, Erik and Chandan, Vishal and Cheville, John and Copland, John A. and Farnell, Michael and Flotte, Thomas and Giama, Nasra and Ho, Thai and Kendrick, Michael and Kocher, Jean-Pierre and Kopp, Karla and Moser, Catherine and Nagorney, David and O’Brien, Daniel and O’Neill, Brian Patrick and Patel, Tushar and Petersen, Gloria and Que, Florencia and Rivera, Michael and Roberts, Lewis and Smallridge, Robert and Smyrk, Thomas and Stanton, Melissa and Thompson, R. Houston and Torbenson, Michael and Yang, Ju Dong and Zhang, Lizhi and Brimo, Fadi and Ajani, Jaffer A. and Gonzalez, Ana Maria Angulo and Behrens, Carmen and Bondaruk, Jolanta and Broaddus, Russell and Czerniak, Bogdan and Esmaeli, Bita and Fujimoto, Junya and Gershenwald, Jeffrey and Guo, Charles and Lazar, Alexander J. and Logothetis, Christopher and Meric-Bernstam, Funda and Moran, Cesar and Ramondetta, Lois and Rice, David and Sood, Anil and Tamboli, Pheroze and Thompson, Timothy and Troncoso, Patricia and Tsao, Anne and Wistuba, Ignacio and Carter, Candace and Haydu, Lauren and Hersey, Peter and Jakrot, Valerie and Kakavand, Hojabr and Kefford, Richard and Lee, Kenneth and Long, Georgina and Mann, Graham and Quinn, Michael and Saw, Robyn and Scolyer, Richard and Shannon, Kerwin and Spillane, Andrew and Stretch, Onathan and Synott, Maria and Thompson, John and Wilmott, James and Al-Ahmadie, Hikmat and Chan, Timothy A. and Ghossein, Ronald and Gopalan, Anuradha and Levine, Douglas A. and Reuter, Victor and Singer, Samuel and Singh, Bhuvanesh and Tien, Nguyen Viet and Broudy, Thomas and Mirsaidi, Cyrus and Nair, Praveen and Drwiega, Paul and Miller, Judy and Smith, Jennifer and Zaren, Howard and Park, Joong-Won and Hung, Nguyen Phi and Kebebew, Electron and Linehan, W. Marston and Metwalli, Adam R. and Pacak, Karel and Pinto, Peter A. and Schiffman, Mark and Schmidt, Laura S. and Vocke, Cathy D. and Wentzensen, Nicolas and Worrell, Robert and Yang, Hannah and Moncrieff, Marc and Goparaju, Chandra and Melamed, Jonathan and Pass, Harvey and Botnariuc, Natalia and Caraman, Irina and Cernat, Mircea and Chemencedji, Inga and Clipca, Adrian and Doruc, Serghei and Gorincioi, Ghenadie and Mura, Sergiu and Pirtac, Maria and Stancul, Irina and Tcaciuc, Diana and Albert, Monique and Alexopoulou, Iakovina and Arnaout, Angel and Bartlett, John and Engel, Jay and Gilbert, Sebastien and Parfitt, Jeremy and Sekhon, Harman and Thomas, George and Rassl, Doris M. and Rintoul, Robert C. and Bifulco, Carlo and Tamakawa, Raina and Urba, Walter and Hayward, Nicholas and Timmers, Henri and Antenucci, Anna and Facciolo, Francesco and Grazi, Gianluca and Marino, Mirella and Merola, Roberta and De Krijger, Ronald and Gimenez-Roqueplo, Anne-Paule and Piché, Alain and Chevalier, Simone and McKercher, Ginette and Birsoy, Kivanc and Barnett, Gene and Brewer, Cathy and Farver, Carol and Naska, Theresa and Pennell, Nathan A. and Raymond, Daniel and Schilero, Cathy and Smolenski, Kathy and Williams, Felicia and Morrison, Carl and Borgia, Jeffrey A. and Liptay, Michael J. and Pool, Mark and Seder, Christopher W. and Junker, Kerstin and Omberg, Larsson and Dinkin, Mikhail and Manikhas, George and Alvaro, Domenico and Bragazzi, Maria Consiglia and Cardinale, Vincenzo and Carpino, Guido and Gaudio, Eugenio and Chesla, David and Cottingham, Sandra and Dubina, Michael and Moiseenko, Fedor and Dhanasekaran, Renumathy and Becker, Karl-Friedrich and Janssen, Klaus-Peter and Slotta-Huspenina, Julia and Abdel-Rahman, Mohamed H. and Aziz, Dina and Bell, Sue and Cebulla, Colleen M. and Davis, Amy and Duell, Rebecca and Elder, J. Bradley and Hilty, Joe and Kumar, Bahavna and Lang, James and Lehman, Norman L. and Mandt, Randy and Nguyen, Phuong and Pilarski, Robert and Rai, Karan and Schoenfield, Lynn and Senecal, Kelly and Wakely, Paul and Hansen, Paul and Lechan, Ronald and Powers, James and Tischler, Arthur and Grizzle, William E. and Sexton, Katherine C. and Kastl, Alison and Henderson, Joel and Porten, Sima and Waldmann, Jens and Fassnacht, Martin and Asa, Sylvia L. and Schadendorf, Dirk and Couce, Marta and Graefen, Markus and Huland, Hartwig and Sauter, Guido and Schlomm, Thorsten and Simon, Ronald and Tennstedt, Pierre and Olabode, Oluwole and Nelson, Mark and Bathe, Oliver and Carroll, Peter R. and Chan, June M. and Disaia, Philip and Glenn, Pat and Kelley, Robin K. and Landen, Charles N. and Phillips, Joanna and Prados, Michael and Simko, Jeffry and Smith-McCune, Karen and VandenBerg, Scott and Roggin, Kevin and Fehrenbach, Ashley and Kendler, Ady and Sifri, Suzanne and Steele, Ruth and Jimeno, Antonio and Carey, Francis and Forgie, Ian and Mannelli, Massimo and Carney, Michael and Hernandez, Brenda and Campos, Benito and Herold-Mende, Christel and Jungk, Christin and Unterberg, Andreas and Von Deimling, Andreas and Bossler, Aaron and Galbraith, Joseph and Jacobus, Laura and Knudson, Michael and Knutson, Tina and Ma, Deqin and Milhem, Mohammed and Sigmund, Rita and Godwin, Andrew K. and Madan, Rashna and Rosenthal, Howard G. and Adebamowo, Clement and Adebamowo, Sally N. and Boussioutas, Alex and Beer, David and Giordano, Thomas and Mes-Masson, Anne-Marie and Saad, Fred and Bocklage, Therese and Landrum, Lisa and Mannel, Robert and Moore, Kathleen and Moxley, Katherine and Postier, Russel and Walker, Joan and Zuna, Rosemary and Feldman, Michael and Valdivieso, Federico and Dhir, Rajiv and Luketich, James and Pinero, Edna M. Mora and Quintero-Aguilo, Mario and Carlotti, Carlos Gilberto and Dos Santos, Jose Sebastião and Kemp, Rafael and Sankarankuty, Ajith and Tirapelli, Daniela and Catto, James and Agnew, Kathy and Swisher, Elizabeth and Creaney, Jenette and Robinson, Bruce and Shelley, Carl Simon and Godwin, Eryn M. and Kendall, Sara and Shipman, Cassaundra and Bradford, Carol and Carey, Thomas and Haddad, Andrea and Moyer, Jeffey and Peterson, Lisa and Prince, Mark and Rozek, Laura and Wolf, Gregory and Bowman, Rayleen and Fong, Kwun M. and Yang, Ian and Korst, Robert and Rathmell, W. Kimryn and Fantacone-Campbell, J. Leigh and Hooke, Jeffrey A. and Kovatich, Albert J. and Shriver, Craig D. and DiPersio, John and Drake, Bettina and Govindan, Ramaswamy and Heath, Sharon and Ley, Timothy and Van Tine, Brian and Westervelt, Peter and Rubin, Mark A. and Lee, Jung Il and Aredes, Natália D. and Mariamidze, Armaz},
  date = {2018-04},
  journaltitle = {Immunity},
  volume = {48},
  number = {4},
  pages = {812-830.e14},
  issn = {10747613},
  doi = {10.1016/j.immuni.2018.03.023},
  url = {https://linkinghub.elsevier.com/retrieve/pii/S1074761318301213},
  urldate = {2023-08-18},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/2J8S7HKI/Thorsson et al. - 2018 - The Immune Landscape of Cancer.pdf}
}

@article{tickotskyMcPASTCRManuallyCurated2017,
  title = {{{McPAS-TCR}}: A Manually Curated Catalogue of Pathology-Associated {{T}} Cell Receptor Sequences},
  shorttitle = {{{McPAS-TCR}}},
  author = {Tickotsky, Nili and Sagiv, Tal and Prilusky, Jaime and Shifrut, Eric and Friedman, Nir},
  date = {2017-09-15},
  journaltitle = {Bioinformatics},
  volume = {33},
  number = {18},
  pages = {2924--2929},
  issn = {1367-4803},
  doi = {10.1093/bioinformatics/btx286},
  url = {https://doi.org/10.1093/bioinformatics/btx286},
  abstract = {While growing numbers of T cell receptor (TCR) repertoires are being mapped by high-throughput sequencing, existing methods do not allow for computationally connecting a given TCR sequence to its target antigen, or relating it to a specific pathology. As an alternative, a manually-curated database can relate TCR sequences with their cognate antigens and associated pathologies based on published experimental data.We present McPAS-TCR, a manually curated database of TCR sequences associated with various pathologies and antigens based on published literature. Our database currently contains more than 5000 sequences of TCRs associated with various pathologic conditions (including pathogen infections, cancer and autoimmunity) and their respective antigens in humans and in mice. A web-based tool allows for searching the database based on different criteria, and for finding annotated sequences from the database in users’ data. The McPAS-TCR website assembles information from a large number of studies that is very hard to dissect otherwise. Initial analyses of the data provide interesting insights on pathology-associated TCR sequences.Free access at http://friedmanlab.weizmann.ac.il/McPAS-TCR/.},
  annotation = {153 citations (Crossref) [2022-09-16]},
  file = {/Users/Nasy/Zotero/storage/JRBBWBL6/Tickotsky et al. - 2017 - McPAS-TCR a manually curated catalogue of patholo.pdf;/Users/Nasy/Zotero/storage/NACVK8Y5/3803440.html}
}

@article{tongSETESequencebasedEnsemble2020,
  title = {{{SETE}}: {{Sequence-based Ensemble}} Learning Approach for {{TCR Epitope}} Binding Prediction},
  shorttitle = {{{SETE}}},
  author = {Tong, Yao and Wang, Jiayin and Zheng, Tian and Zhang, Xuanping and Xiao, Xiao and Zhu, Xiaoyan and Lai, Xin and Liu, Xiang},
  date = {2020-08-01},
  journaltitle = {Computational Biology and Chemistry},
  volume = {87},
  pages = {107281},
  issn = {1476-9271},
  doi = {10.1016/j.compbiolchem.2020.107281},
  url = {https://www.sciencedirect.com/science/article/pii/S1476927120303194},
  abstract = {Predicting the binding of T cell receptors (TCRs) to epitopes plays a vital role in the immunotherapy, because it guides the development of therapeutic vaccines and cancer treatments. Many prediction methods attempted to explain the relationship between TCR repertoires from different aspects such as the V(D)J gene locus and the biophysical features of amino acids molecules, but the extraction of these features is time consuming and the performance of these models are limited. Few studies have investigated how k-mers formed by adjacent amino acids in TCR sequences direct the epitope recognition, and the specific mechanism of TCR epitope binding is still unclear. Motivated by these, we presented SETE (Sequence-based Ensemble learning approach for TCR Epitope binding prediction), a novel model to predict the TCR epitope binding accurately. The model deconstructed the CDR3β sequence to short amino acid chains as features and learned the pattern of them between different TCR repertoires with gradient boosting decision tree algorithm. Experiments have demonstrated that SETE can be helpful in predicting the TCRs’ corresponding epitopes and it outperforms other state-of-the-art methods in predicting the epitope specificity of TCR on VDJdb data set. The source codes have been uploaded at https://github.com/wonanut/SETE for academic usage only.},
  langid = {english},
  keywords = {CDR3,Gradient boosting tree,Immunotherapy,TCR,VDJdb},
  annotation = {16 citations (Crossref) [2022-09-16]},
  file = {/Users/Nasy/Zotero/storage/TBLX9WUM/Tong et al. - 2020 - SETE Sequence-based Ensemble learning approach fo.pdf;/Users/Nasy/Zotero/storage/ZRQVFJ5H/S1476927120303194.html}
}

@online{TopologybasedNetworkTree,
  title = {A Topology-Based Network Tree for the Prediction of Protein–Protein Binding Affinity Changes Following Mutation | {{Nature Machine Intelligence}}},
  url = {https://www.nature.com/articles/s42256-020-0149-6},
  urldate = {2022-07-30},
  file = {/Users/Nasy/Zotero/storage/LR8EDARE/s42256-020-0149-6.html}
}

@online{touvronLLaMAOpenEfficient2023,
  title = {{{LLaMA}}: {{Open}} and {{Efficient Foundation Language Models}}},
  shorttitle = {{{LLaMA}}},
  author = {Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timothée and Rozière, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and Rodriguez, Aurelien and Joulin, Armand and Grave, Edouard and Lample, Guillaume},
  date = {2023-02-27},
  eprint = {2302.13971},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/2302.13971},
  urldate = {2023-05-19},
  abstract = {We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community.},
  pubstate = {prepublished},
  keywords = {Computer Science - Computation and Language},
  file = {/Users/Nasy/Zotero/storage/GB8M4L2S/Touvron et al. - 2023 - LLaMA Open and Efficient Foundation Language Mode.pdf;/Users/Nasy/Zotero/storage/PFGQVMWP/2302.html}
}

@article{tranImmunogenicitySomaticMutations2015,
  title = {Immunogenicity of Somatic Mutations in Human Gastrointestinal Cancers},
  author = {Tran, Eric and Ahmadzadeh, Mojgan and Lu, Yong-Chen and Gros, Alena and Turcotte, Simon and Robbins, Paul F. and Gartner, Jared J. and Zheng, Zhili and Li, Yong F. and Ray, Satyajit and Wunderlich, John R. and Somerville, Robert P. and Rosenberg, Steven A.},
  date = {2015-12-11},
  journaltitle = {Science},
  volume = {350},
  number = {6266},
  eprint = {26516200},
  eprinttype = {pmid},
  pages = {1387--1390},
  issn = {1095-9203},
  doi = {10.1126/science.aad1253},
  abstract = {It is unknown whether the human immune system frequently mounts a T cell response against mutations expressed by common epithelial cancers. Using a next-generation sequencing approach combined with high-throughput immunologic screening, we demonstrated that tumor-infiltrating lymphocytes (TILs) from 9 out of 10 patients with metastatic gastrointestinal cancers contained CD4(+) and/or CD8(+) T cells that recognized one to three neo-epitopes derived from somatic mutations expressed by the patient's own tumor. There were no immunogenic epitopes shared between these patients. However, we identified in one patient a human leukocyte antigen-C*08:02-restricted T cell receptor from CD8(+) TILs that targeted the KRAS(G12D) hotspot driver mutation found in many human cancers. Thus, a high frequency of patients with common gastrointestinal cancers harbor immunogenic mutations that can potentially be exploited for the development of highly personalized immunotherapies.},
  langid = {english},
  pmcid = {PMC7445892},
  keywords = {Adult,CD8-Positive T-Lymphocytes,Cell Line Tumor,Female,Gastrointestinal Neoplasms,HLA-C Antigens,Humans,Immunodominant Epitopes,Immunotherapy,Lymphocytes Tumor-Infiltrating,Male,Middle Aged,Mutation,Precision Medicine,Proto-Oncogene Proteins,Proto-Oncogene Proteins p21(ras),ras Proteins,Receptors Antigen T-Cell},
  annotation = {474 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/RA26P579/Tran et al. - 2015 - Immunogenicity of somatic mutations in human gastr.pdf}
}

@unpublished{tsuyuzakiBiologicalSystemsHeterogeneous2017,
  title = {Biological {{Systems}} as {{Heterogeneous Information Networks}}: {{A Mini-review}} and {{Perspectives}}},
  shorttitle = {Biological {{Systems}} as {{Heterogeneous Information Networks}}},
  author = {Tsuyuzaki, Koki and Nikaido, Itoshi},
  date = {2017-12-23},
  eprint = {1712.08865},
  eprinttype = {arXiv},
  eprintclass = {cs, q-bio},
  doi = {10.48550/arXiv.1712.08865},
  url = {http://arxiv.org/abs/1712.08865},
  urldate = {2022-06-05},
  abstract = {In the real world, most objects and data have multiple types of attributes and inter-connections. Such data structures are named "Heterogeneous Information Networks" (HIN) and have been widely researched. Biological systems are also considered to be highly complicated HIN. In this work, we review various applications of HIN methods to biological and chemical data, discuss some advanced topics, and describe some future research directions.},
  keywords = {Computer Science - Information Retrieval,Quantitative Biology - Molecular Networks},
  file = {/Users/Nasy/Zotero/storage/5YPUIEW8/Tsuyuzaki and Nikaido - 2017 - Biological Systems as Heterogeneous Information Ne.pdf;/Users/Nasy/Zotero/storage/LSXB45BH/1712.html}
}

@article{tsuyuzakiBiologicalSystemsHeterogeneous2018,
  title = {Biological {{Systems}} as {{Heterogeneous Information Networks}}: {{A Mini-review}} and {{Perspectives}}},
  author = {Tsuyuzaki, Koki and Nikaido, Itoshi},
  date = {2018},
  journaltitle = {Los Angeles},
  pages = {8},
  abstract = {In the real world, most objects and data have multiple types of attributes and inter-connections. Such data structures are named “Heterogeneous Information Networks” (HIN) and have been widely researched.},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/R3S4I8VK/Tsuyuzaki and Nikaido - 2018 - Biological Systems as Heterogeneous Information Ne.pdf}
}

@book{ungarAnalyticHyperbolicGeometry2005,
  title = {Analytic {{Hyperbolic Geometry}}: {{Mathematical Foundations}} and {{Applications}}},
  shorttitle = {Analytic {{Hyperbolic Geometry}}},
  author = {Ungar, Abraham A},
  date = {2005-09},
  publisher = {WORLD SCIENTIFIC},
  doi = {10.1142/5914},
  url = {http://www.worldscientific.com/worldscibooks/10.1142/5914},
  isbn = {978-981-256-457-3 978-981-270-327-9},
  langid = {english}
}

@book{ungarGyrovectorSpaceApproach2009,
  title = {A {{Gyrovector Space Approach}} to {{Hyperbolic Geometry}}},
  author = {Ungar, Abraham Albert},
  date = {2009},
  series = {Synthesis {{Lectures}} on {{Mathematics}} \& {{Statistics}}},
  publisher = {Springer International Publishing},
  location = {Cham},
  doi = {10.1007/978-3-031-02396-5},
  url = {https://link.springer.com/10.1007/978-3-031-02396-5},
  isbn = {978-3-031-01268-6 978-3-031-02396-5},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/EMV4HN9S/Ungar - 2009 - A Gyrovector Space Approach to Hyperbolic Geometry.pdf}
}

@inproceedings{vashishthCompositionbasedMultiRelationalGraph2019,
  title = {Composition-Based {{Multi-Relational Graph Convolutional Networks}}},
  author = {Vashishth, Shikhar and Sanyal, Soumya and Nitin, Vikram and Talukdar, Partha},
  date = {2019-09-25},
  url = {https://openreview.net/forum?id=BylA_C4tPr},
  urldate = {2022-05-30},
  abstract = {A Composition-based Graph Convolutional framework for multi-relational graphs.},
  eventtitle = {International {{Conference}} on {{Learning Representations}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/4MEJSFQA/Vashishth et al. - 2019 - Composition-based Multi-Relational Graph Convoluti.pdf;/Users/Nasy/Zotero/storage/Y9MD5GTA/forum.html}
}

@article{vashishthCOMPOSITIONBASEDMULTIRELATIONALGRAPH2020,
  title = {{{COMPOSITION-BASED MULTI-RELATIONAL GRAPH CONVOLUTIONAL NETWORKS}}},
  author = {Vashishth, Shikhar and Sanyal, Soumya and Nitin, Vikram and Talukdar, Partha},
  date = {2020},
  pages = {16},
  abstract = {Graph Convolutional Networks (GCNs) have recently been shown to be quite successful in modeling graph-structured data. However, the primary focus has been on handling simple undirected graphs. Multi-relational graphs are a more general and prevalent form of graphs where each edge has a label and direction associated with it. Most of the existing approaches to handle such graphs suffer from over-parameterization and are restricted to learning representations of nodes only. In this paper, we propose COMPGCN, a novel Graph Convolutional framework which jointly embeds both nodes and relations in a relational graph. COMPGCN leverages a variety of entity-relation composition operations from Knowledge Graph Embedding techniques and scales with the number of relations. It also generalizes several of the existing multi-relational GCN methods. We evaluate our proposed method on multiple tasks such as node classification, link prediction, and graph classification, and achieve demonstrably superior results. We make the source code of COMPGCN available to foster reproducible research.},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/6UXZJT5V/Vashishth et al. - 2020 - COMPOSITION-BASED MULTI-RELATIONAL GRAPH CONVOLUTI.pdf}
}

@unpublished{vashishthCompositionbasedMultiRelationalGraph2020,
  title = {Composition-Based {{Multi-Relational Graph Convolutional Networks}}},
  author = {Vashishth, Shikhar and Sanyal, Soumya and Nitin, Vikram and Talukdar, Partha},
  date = {2020-01-18},
  eprint = {1911.03082},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  publisher = {arXiv},
  doi = {10.48550/arXiv.1911.03082},
  url = {http://arxiv.org/abs/1911.03082},
  urldate = {2022-05-30},
  abstract = {Graph Convolutional Networks (GCNs) have recently been shown to be quite successful in modeling graph-structured data. However, the primary focus has been on handling simple undirected graphs. Multi-relational graphs are a more general and prevalent form of graphs where each edge has a label and direction associated with it. Most of the existing approaches to handle such graphs suffer from over-parameterization and are restricted to learning representations of nodes only. In this paper, we propose CompGCN, a novel Graph Convolutional framework which jointly embeds both nodes and relations in a relational graph. CompGCN leverages a variety of entity-relation composition operations from Knowledge Graph Embedding techniques and scales with the number of relations. It also generalizes several of the existing multi-relational GCN methods. We evaluate our proposed method on multiple tasks such as node classification, link prediction, and graph classification, and achieve demonstrably superior results. We make the source code of CompGCN available to foster reproducible research.},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/67MAVCD5/Vashishth et al. - 2020 - Composition-based Multi-Relational Graph Convoluti.pdf;/Users/Nasy/Zotero/storage/MV6ERHMV/1911.html}
}

@unpublished{velickovicGraphAttentionNetworks2018,
  title = {Graph {{Attention Networks}}},
  author = {Veličković, Petar and Cucurull, Guillem and Casanova, Arantxa and Romero, Adriana and Liò, Pietro and Bengio, Yoshua},
  date = {2018-02-04},
  eprint = {1710.10903},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/1710.10903},
  urldate = {2022-03-11},
  abstract = {We present graph attention networks (GATs), novel neural network architectures that operate on graph-structured data, leveraging masked self-attentional layers to address the shortcomings of prior methods based on graph convolutions or their approximations. By stacking layers in which nodes are able to attend over their neighborhoods' features, we enable (implicitly) specifying different weights to different nodes in a neighborhood, without requiring any kind of costly matrix operation (such as inversion) or depending on knowing the graph structure upfront. In this way, we address several key challenges of spectral-based graph neural networks simultaneously, and make our model readily applicable to inductive as well as transductive problems. Our GAT models have achieved or matched state-of-the-art results across four established transductive and inductive graph benchmarks: the Cora, Citeseer and Pubmed citation network datasets, as well as a protein-protein interaction dataset (wherein test graphs remain unseen during training).},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Computer Science - Social and Information Networks,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/KJUJSYN3/Veličković et al. - 2018 - Graph Attention Networks.pdf;/Users/Nasy/Zotero/storage/KRQITNTG/1710.html}
}

@online{vyasSOAPImprovingStabilizing2024,
  title = {{{SOAP}}: {{Improving}} and {{Stabilizing Shampoo}} Using {{Adam}}},
  shorttitle = {{{SOAP}}},
  author = {Vyas, Nikhil and Morwani, Depen and Zhao, Rosie and Shapira, Itai and Brandfonbrener, David and Janson, Lucas and Kakade, Sham},
  date = {2024-09-17},
  eprint = {2409.11321},
  eprinttype = {arXiv},
  doi = {10.48550/arXiv.2409.11321},
  url = {http://arxiv.org/abs/2409.11321},
  urldate = {2024-11-08},
  abstract = {There is growing evidence of the effectiveness of Shampoo, a higher-order preconditioning method, over Adam in deep learning optimization tasks. However, Shampoo's drawbacks include additional hyperparameters and computational overhead when compared to Adam, which only updates running averages of first- and second-moment quantities. This work establishes a formal connection between Shampoo (implemented with the 1/2 power) and Adafactor -- a memory-efficient approximation of Adam -- showing that Shampoo is equivalent to running Adafactor in the eigenbasis of Shampoo's preconditioner. This insight leads to the design of a simpler and computationally efficient algorithm: \$\textbackslash textbf\{S\}\$hampo\$\textbackslash textbf\{O\}\$ with \$\textbackslash textbf\{A\}\$dam in the \$\textbackslash textbf\{P\}\$reconditioner's eigenbasis (SOAP). With regards to improving Shampoo's computational efficiency, the most straightforward approach would be to simply compute Shampoo's eigendecomposition less frequently. Unfortunately, as our empirical results show, this leads to performance degradation that worsens with this frequency. SOAP mitigates this degradation by continually updating the running average of the second moment, just as Adam does, but in the current (slowly changing) coordinate basis. Furthermore, since SOAP is equivalent to running Adam in a rotated space, it introduces only one additional hyperparameter (the preconditioning frequency) compared to Adam. We empirically evaluate SOAP on language model pre-training with 360m and 660m sized models. In the large batch regime, SOAP reduces the number of iterations by over 40\% and wall clock time by over 35\% compared to AdamW, with approximately 20\% improvements in both metrics compared to Shampoo. An implementation of SOAP is available at https://github.com/nikhilvyas/SOAP.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/CVK59YSN/Vyas et al. - 2024 - SOAP Improving and Stabilizing Shampoo using Adam.pdf;/Users/Nasy/Zotero/storage/AI2GAZFR/2409.html}
}

@inproceedings{wangBiGCNBinaryGraph2021,
  title = {Bi-{{GCN}}: {{Binary Graph Convolutional Network}}},
  shorttitle = {Bi-{{GCN}}},
  author = {Wang, Junfu and Wang, Yunhong and Yang, Zhen and Yang, Liang and Guo, Yuanfang},
  date = {2021},
  pages = {1561--1570},
  url = {https://openaccess.thecvf.com/content/CVPR2021/html/Wang_Bi-GCN_Binary_Graph_Convolutional_Network_CVPR_2021_paper.html},
  eventtitle = {Proceedings of the {{IEEE}}/{{CVF Conference}} on {{Computer Vision}} and {{Pattern Recognition}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/2XLDUDRE/Wang et al. - 2021 - Bi-GCN Binary Graph Convolutional Network.pdf}
}

@inproceedings{wangDeepMultimodalFusion2020,
  title = {Deep {{Multimodal Fusion}} by {{Channel Exchanging}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Wang, Yikai and Huang, Wenbing and Sun, Fuchun and Xu, Tingyang and Rong, Yu and Huang, Junzhou},
  date = {2020},
  volume = {33},
  pages = {4835--4845},
  publisher = {Curran Associates, Inc.},
  url = {https://proceedings.neurips.cc/paper_files/paper/2020/hash/339a18def9898dd60a634b2ad8fbbd58-Abstract.html},
  abstract = {Deep multimodal fusion by using multiple sources of data for classification or regression has exhibited a clear advantage over the unimodal counterpart on various applications. Yet, current methods including aggregation-based and alignment-based fusion are still inadequate in balancing the trade-off between inter-modal fusion and intra-modal processing, incurring a bottleneck of performance improvement. To this end, this paper proposes Channel-Exchanging-Network (CEN), a parameter-free multimodal fusion framework that dynamically exchanges channels between sub-networks of different modalities. Specifically, the channel exchanging process is self-guided by individual channel importance that is measured by the magnitude of Batch-Normalization (BN) scaling factor during training. The validity of such exchanging process is also guaranteed by sharing convolutional filters yet keeping separate BN layers across modalities, which, as an add-on benefit, allows our multimodal architecture to be almost as compact as a unimodal network. Extensive experiments on semantic segmentation via RGB-D data and image translation through multi-domain input verify the effectiveness of our CEN compared to current state-of-the-art methods. Detailed ablation studies have also been carried out, which provably affirm the advantage of each component we propose. Our code is available at https://github.com/yikaiw/CEN.},
  keywords = {⛔ No DOI found},
  file = {/Users/Nasy/Zotero/storage/6CIRVZ3M/Wang et al. - 2020 - Deep Multimodal Fusion by Channel Exchanging.pdf}
}

@article{wangDynamicHeterogeneousInformation2022,
  title = {Dynamic {{Heterogeneous Information Network Embedding With Meta-Path Based Proximity}}},
  author = {Wang, Xiao and Lu, Yuanfu and Shi, Chuan and Wang, Ruijia and Cui, Peng and Mou, Shuai},
  date = {2022-03},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  volume = {34},
  number = {3},
  pages = {1117--1132},
  issn = {1558-2191},
  doi = {10.1109/TKDE.2020.2993870},
  abstract = {Heterogeneous information network (HIN) embedding aims at learning the low-dimensional representation of nodes while preserving structure and semantics in a HIN. Existing methods mainly focus on static networks, while a real HIN usually evolves over time with the addition (deletion) of multiple types of nodes and edges. Because even a tiny change can influence the whole structure and semantics, the conventional HIN embedding methods need to be retrained to get the updated embeddings, which is time-consuming and unrealistic. In this paper, we investigate the problem of dynamic HIN embedding and propose a novel Dynamic HIN Embedding model (DyHNE) with meta-path based proximity. Specifically, we introduce the meta-path based first- and second-order proximities to preserve structure and semantics in HINs. As the HIN evolves over time, we naturally capture changes with the perturbation of meta-path augmented adjacency matrices. Thereafter, we learn the node embeddings by solving generalized eigenvalue problem effectively and employ eigenvalue perturbation to derive the updated embeddings efficiently without retraining. Experiments show that DyHNE outperforms the state-of-the-arts in terms of effectiveness and efficiency.},
  eventtitle = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
  keywords = {Dynamic heterogeneous information network,Eigenvalues and eigenfunctions,Matrix decomposition,network embedding,Neural networks,Perturbation methods,Semantics,social network analysis,Task analysis,Telecommunications},
  annotation = {12 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/U92PC42H/Wang et al. - 2022 - Dynamic Heterogeneous Information Network Embeddin.pdf;/Users/Nasy/Zotero/storage/YVRTDKHL/9091208.html}
}

@inproceedings{wangHeterogeneousGraphAttention2019,
  title = {Heterogeneous {{Graph Attention Network}}},
  booktitle = {The {{World Wide Web Conference}}},
  author = {Wang, Xiao and Ji, Houye and Shi, Chuan and Wang, Bai and Ye, Yanfang and Cui, Peng and Yu, Philip S},
  date = {2019-05-13},
  series = {{{WWW}} '19},
  pages = {2022--2032},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3308558.3313562},
  url = {https://doi.org/10.1145/3308558.3313562},
  abstract = {Graph neural network, as a powerful graph representation technique based on deep learning, has shown superior performance and attracted considerable research interest. However, it has not been fully considered in graph neural network for heterogeneous graph which contains different types of nodes and links. The heterogeneity and rich semantic information bring great challenges for designing a graph neural network for heterogeneous graph. Recently, one of the most exciting advancements in deep learning is the attention mechanism, whose great potential has been well demonstrated in various areas. In this paper, we first propose a novel heterogeneous graph neural network based on the hierarchical attention, including node-level and semantic-level attentions. Specifically, the node-level attention aims to learn the importance between a node and its meta-path based neighbors, while the semantic-level attention is able to learn the importance of different meta-paths. With the learned importance from both node-level and semantic-level attention, the importance of node and meta-path can be fully considered. Then the proposed model can generate node embedding by aggregating features from meta-path based neighbors in a hierarchical manner. Extensive experimental results on three real-world heterogeneous graphs not only show the superior performance of our proposed model over the state-of-the-arts, but also demonstrate its potentially good interpretability for graph analysis.},
  isbn = {978-1-4503-6674-8},
  keywords = {Graph Analysis,Neural Network,Social Network},
  annotation = {410 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/IW7IS679/Wang et al. - 2019 - Heterogeneous Graph Attention Network.pdf}
}

@inproceedings{wangHeterogeneousGraphMatching2019,
  title = {Heterogeneous {{Graph Matching Networks}} for {{Unknown Malware Detection}}},
  booktitle = {Proceedings of the {{Twenty-Eighth International Joint Conference}} on {{Artificial Intelligence}}},
  author = {Wang, Shen and Chen, Zhengzhang and Yu, Xiao and Li, Ding and Ni, Jingchao and Tang, Lu-An and Gui, Jiaping and Li, Zhichun and Chen, Haifeng and Yu, Philip S.},
  date = {2019-08},
  pages = {3762--3770},
  publisher = {International Joint Conferences on Artificial Intelligence Organization},
  location = {Macao, China},
  doi = {10.24963/ijcai.2019/522},
  url = {https://www.ijcai.org/proceedings/2019/522},
  urldate = {2022-05-26},
  abstract = {Information systems have widely been the target of malware attacks. Traditional signature-based malicious program detection algorithms can only detect known malware and are prone to evasion techniques such as binary obfuscation, while behaviorbased approaches highly rely on the malware training samples and incur prohibitively high training cost. To address the limitations of existing techniques, we propose MatchGNet, a heterogeneous Graph Matching Network model to learn the graph representation and similarity metric simultaneously based on the invariant graph modeling of the program’s execution behaviors. We conduct a systematic evaluation of our model and show that it is accurate in detecting malicious program behavior and can help detect malware attacks with less false positives. MatchGNet outperforms the state-of-the-art algorithms in malware detection by generating 50\% less false positives while keeping zero false negatives.},
  eventtitle = {Twenty-{{Eighth International Joint Conference}} on {{Artificial Intelligence}} \{\vphantom\}{{IJCAI-19}}\vphantom\{\}},
  isbn = {978-0-9992411-4-1},
  langid = {english},
  annotation = {23 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/44Q2UXEB/Wang et al. - 2019 - Heterogeneous Graph Matching Networks for Unknown .pdf}
}

@inproceedings{wangSelfConsistencyImprovesChain2023,
  title = {Self-{{Consistency Improves Chain}} of {{Thought Reasoning}} in {{Language Models}}},
  author = {Wang, Xuezhi and Wei, Jason and Schuurmans, Dale and Le, Quoc V. and Chi, Ed H. and Narang, Sharan and Chowdhery, Aakanksha and Zhou, Denny},
  date = {2023-02-01},
  url = {https://openreview.net/forum?id=1PL1NIMMrw},
  abstract = {Chain-of-thought prompting combined with pretrained large language models has achieved encouraging results on complex reasoning tasks. In this paper, we propose a new decoding strategy, self-consistency, to replace the naive greedy decoding used in chain-of-thought prompting. It first samples a diverse set of reasoning paths instead of only taking the greedy one, and then selects the most consistent answer by marginalizing out all possible reasoning paths. Self-consistency leverages the intuition that a complex reasoning problem typically admits multiple different ways of thinking leading to its unique correct answer. Our extensive empirical evaluation shows that self-consistency boosts the performance of chain-of-thought prompting with a striking margin on a range of popular arithmetic and commonsense reasoning benchmarks, including GSM8K (+17.9\%), SVAMP (+11.0\%), AQuA (+12.2\%), StrategyQA (+6.4\%) and ARC-challenge (+3.9\%).},
  eventtitle = {The {{Eleventh International Conference}} on {{Learning Representations}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/4358EJ8T/Wang et al. - 2023 - Self-Consistency Improves Chain of Thought Reasoni.pdf}
}

@article{wangSurveyHeterogeneousGraph2022,
  title = {A {{Survey}} on {{Heterogeneous Graph Embedding}}: {{Methods}}, {{Techniques}}, {{Applications}} and {{Sources}}},
  shorttitle = {A {{Survey}} on {{Heterogeneous Graph Embedding}}},
  author = {Wang, Xiao and Bo, Deyu and Shi, Chuan and Fan, Shaohua and Ye, Yanfang and Yu, Philip S.},
  date = {2022},
  journaltitle = {IEEE Transactions on Big Data},
  pages = {1--1},
  issn = {2332-7790},
  doi = {10.1109/TBDATA.2022.3177455},
  abstract = {Heterogeneous graphs (HGs) also known as heterogeneous information networks have become ubiquitous in real-world scenarios; therefore, HG embedding, which aim to learn representations in a lower-dimension space while preserving the heterogeneous structures and semantics for downstream tasks (e.g., node/graph classification, node clustering, link prediction), has drawn considerable attentions. In this survey, we perform a comprehensive review of the recent development on HG embedding methods and techniques. We first introduce the basic concepts of HG and discuss the unique challenges brought by the heterogeneity for HG embedding; and then we systemically survey and categorize the state-of-the-art HG embedding methods based on the information they used in the learning process. In particular, for each representative HG embedding method, we provide detailed introduction and further analyze its pros and cons; meanwhile, we explore the transformativeness and applicability of different types of HG embedding methods in the real-world industrial environments. We further present several widely deployed systems that have demonstrated the success of HG embedding techniques in resolving real-world application problems. To facilitate future research and applications in this area, we also summarize the open-source code, existing graph learning platforms and benchmark datasets. Finally, we explore the additional issues and challenges of HG embedding and forecast the future research directions in this field.},
  eventtitle = {{{IEEE Transactions}} on {{Big Data}}},
  langid = {english},
  keywords = {Benchmark testing,Big Data,deep learning,Fuses,graph embedding,heterogeneous graph,machine learning,Mercury (metals),Open source software,Semantics,Telecommunications},
  annotation = {0 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/68ZHWBJ2/Wang et al. - 2020 - A Survey on Heterogeneous Graph Embedding Methods.pdf;/Users/Nasy/Zotero/storage/GM5Z9WY2/9780569.html;/Users/Nasy/Zotero/storage/SXZZH4KE/2011.html}
}

@article{wangTopologybasedNetworkTree2020,
  title = {A Topology-Based Network Tree for the Prediction of Protein–Protein Binding Affinity Changes Following Mutation},
  author = {Wang, Menglun and Cang, Zixuan and Wei, Guo-Wei},
  date = {2020-02},
  journaltitle = {Nat Mach Intell},
  volume = {2},
  number = {2},
  pages = {116--123},
  issn = {2522-5839},
  doi = {10.1038/s42256-020-0149-6},
  url = {http://www.nature.com/articles/s42256-020-0149-6},
  urldate = {2022-07-30},
  langid = {english},
  annotation = {39 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/92C3XFVP/Wang et al. - 2020 - A topology-based network tree for the prediction o.pdf}
}

@inproceedings{weiChainofThoughtPromptingElicits2022,
  title = {Chain-of-{{Thought Prompting Elicits Reasoning}} in {{Large Language Models}}},
  author = {Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Ichter, Brian and Xia, Fei and Chi, Ed H. and Le, Quoc V. and Zhou, Denny},
  date = {2022-10-31},
  url = {https://openreview.net/forum?id=_VjQlMeSB_J},
  abstract = {We explore how generating a chain of thought---a series of intermediate reasoning steps---significantly improves the ability of large language models to perform complex reasoning. In particular, we show how such reasoning abilities emerge naturally in sufficiently large language models via a simple method called chain of thought prompting, where a few chain of thought demonstrations are provided as exemplars in prompting. Experiments on three large language models show that chain of thought prompting improves performance on a range of arithmetic, commonsense, and symbolic reasoning tasks. The empirical gains can be striking. For instance, prompting a 540B-parameter language model with just eight chain of thought exemplars achieves state of the art accuracy on the GSM8K benchmark of math word problems, surpassing even finetuned GPT-3 with a verifier.},
  eventtitle = {Advances in {{Neural Information Processing Systems}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/3LIC3UL6/Wei et al. - 2022 - Chain-of-Thought Prompting Elicits Reasoning in La.pdf}
}

@inproceedings{weiFinetunedLanguageModels2022,
  title = {Finetuned {{Language Models}} Are {{Zero-Shot Learners}}},
  author = {Wei, Jason and Bosma, Maarten and Zhao, Vincent and Guu, Kelvin and Yu, Adams Wei and Lester, Brian and Du, Nan and Dai, Andrew M. and Le, Quoc V.},
  date = {2022-01-28},
  url = {https://openreview.net/forum?id=gEZrGCozdqR},
  abstract = {This paper explores a simple method for improving the zero-shot learning abilities of language models. We show that instruction tuning—finetuning language models on a collection of datasets described via instructions—substantially improves zero-shot performance on unseen tasks. We take a 137B parameter pretrained language model and instruction tune it on over 60 NLP datasets verbalized via natural language instruction templates. We evaluate this instruction-tuned model, which we call FLAN, on unseen task types. FLAN substantially improves the performance of its unmodified counterpart and surpasses zero-shot 175B GPT-3 on 20 of 25 datasets that we evaluate. FLAN even outperforms few-shot GPT-3 by a large margin on ANLI, RTE, BoolQ, AI2-ARC, OpenbookQA, and StoryCloze. Ablation studies reveal that number of finetuning datasets, model scale, and natural language instructions are key to the success of instruction tuning.},
  eventtitle = {International {{Conference}} on {{Learning Representations}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/M3PSAW4C/Wei et al. - 2022 - Finetuned Language Models are Zero-Shot Learners.pdf}
}

@inproceedings{wuGraphConvolutionalNetworks2019,
  title = {Graph {{Convolutional Networks}} on {{User Mobility Heterogeneous Graphs}} for {{Social Relationship Inference}}},
  booktitle = {Proceedings of the {{Twenty-Eighth International Joint Conference}} on {{Artificial Intelligence}}},
  author = {Wu, Yongji and Lian, Defu and Jin, Shuowei and Chen, Enhong},
  date = {2019-08},
  pages = {3898--3904},
  publisher = {International Joint Conferences on Artificial Intelligence Organization},
  location = {Macao, China},
  doi = {10.24963/ijcai.2019/541},
  url = {https://www.ijcai.org/proceedings/2019/541},
  urldate = {2022-05-26},
  abstract = {Inferring social relations from user trajectory data is of great value in real-world applications such as friend recommendation and ride-sharing. Most existing methods predict relationship based on a pairwise approach using some hand-crafted features or rely on a simple skip-gram based model to learn embeddings on graphs. Using hand-crafted features often fails to capture the complex dynamics in human social relations, while the graph embedding based methods only use random walks to propagate information and cannot incorporate external semantic data provided. We propose a novel model that utilizes Graph Convolutional Networks (GCNs) to learn user embeddings on the User Mobility Heterogeneous Graph in an unsupervised manner. This model is capable of propagating relation layer-wisely as well as combining both the rich structural information in the heterogeneous graph and predictive node features provided. Our method can also be extended to a semi-supervised setting if a part of the social network is available. The evaluation on three real-world datasets demonstrates that our method outperforms the state-ofthe-art approaches.},
  eventtitle = {Twenty-{{Eighth International Joint Conference}} on {{Artificial Intelligence}} \{\vphantom\}{{IJCAI-19}}\vphantom\{\}},
  isbn = {978-0-9992411-4-1},
  langid = {english},
  annotation = {11 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/CSBQ4HYG/Wu et al. - 2019 - Graph Convolutional Networks on User Mobility Hete.pdf}
}

@inproceedings{wuRelationAwareEntityAlignment2019,
  title = {Relation-{{Aware Entity Alignment}} for {{Heterogeneous Knowledge Graphs}}},
  booktitle = {Proceedings of the {{Twenty-Eighth International Joint Conference}} on {{Artificial Intelligence}}},
  author = {Wu, Yuting and Liu, Xiao and Feng, Yansong and Wang, Zheng and Yan, Rui and Zhao, Dongyan},
  date = {2019-08},
  pages = {5278--5284},
  publisher = {International Joint Conferences on Artificial Intelligence Organization},
  location = {Macao, China},
  doi = {10.24963/ijcai.2019/733},
  url = {https://www.ijcai.org/proceedings/2019/733},
  urldate = {2022-05-26},
  abstract = {Entity alignment is the task of linking entities with the same real-world identity from different knowledge graphs (KGs), which has been recently dominated by embedding-based methods. Such approaches work by learning KG representations so that entity alignment can be performed by measuring the similarities between entity embeddings. While promising, prior works in the field often fail to properly capture complex relation information that commonly exists in multi-relational KGs, leaving much room for improvement. In this paper, we propose a novel Relation-aware Dual-Graph Convolutional Network (RDGCN) to incorporate relation information via attentive interactions between the knowledge graph and its dual relation counterpart, and further capture neighboring structures to learn better entity representations. Experiments on three real-world cross-lingual datasets show that our approach delivers better and more robust results over the state-of-the-art alignment methods by learning better KG representations.},
  eventtitle = {Twenty-{{Eighth International Joint Conference}} on {{Artificial Intelligence}} \{\vphantom\}{{IJCAI-19}}\vphantom\{\}},
  isbn = {978-0-9992411-4-1},
  langid = {english},
  annotation = {47 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/S3AASVVV/Wu et al. - 2019 - Relation-Aware Entity Alignment for Heterogeneous .pdf}
}

@unpublished{wuSamplingMattersDeep2018,
  title = {Sampling {{Matters}} in {{Deep Embedding Learning}}},
  author = {Wu, Chao-Yuan and Manmatha, R. and Smola, Alexander J. and Krähenbühl, Philipp},
  date = {2018-01-16},
  eprint = {1706.07567},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/1706.07567},
  urldate = {2022-05-10},
  abstract = {Deep embeddings answer one simple question: How similar are two images? Learning these embeddings is the bedrock of verification, zero-shot learning, and visual search. The most prominent approaches optimize a deep convolutional network with a suitable loss function, such as contrastive loss or triplet loss. While a rich line of work focuses solely on the loss functions, we show in this paper that selecting training examples plays an equally important role. We propose distance weighted sampling, which selects more informative and stable examples than traditional approaches. In addition, we show that a simple margin based loss is sufficient to outperform all other loss functions. We evaluate our approach on the Stanford Online Products, CAR196, and the CUB200-2011 datasets for image retrieval and clustering, and on the LFW dataset for face verification. Our method achieves state-of-the-art performance on all of them.},
  keywords = {Computer Science - Computer Vision and Pattern Recognition},
  file = {/Users/Nasy/Zotero/storage/L4GBU4VR/Wu et al. - 2018 - Sampling Matters in Deep Embedding Learning.pdf;/Users/Nasy/Zotero/storage/37U5NIAK/1706.html}
}

@online{wuTCRBERTLearningGrammar2021,
  title = {{{TCR-BERT}}: Learning the Grammar of {{T-cell}} Receptors for Flexible Antigen-Xbinding Analyses},
  shorttitle = {{{TCR-BERT}}},
  author = {Wu, Kevin and Yost, Kathryn E. and Daniel, Bence and Belk, Julia A. and Xia, Yu and Egawa, Takeshi and Satpathy, Ansuman and Chang, Howard Y. and Zou, James},
  date = {2021-11-20},
  eprinttype = {bioRxiv},
  eprintclass = {New Results},
  pages = {2021.11.18.469186},
  doi = {10.1101/2021.11.18.469186},
  url = {https://www.biorxiv.org/content/10.1101/2021.11.18.469186v1},
  urldate = {2022-08-19},
  abstract = {The T-cell receptor (TCR) allows T-cells to recognize and respond to antigens presented by infected and diseased cells. However, due to TCRs’ staggering diversity and the complex binding dynamics underlying TCR antigen recognition, it is challenging to predict which antigens a given TCR may bind to. Here, we present TCR-BERT, a deep learning model that applies self-supervised transfer learning to this problem. TCR-BERT leverages unlabeled TCR sequences to learn a general, versatile representation of TCR sequences, enabling numerous downstream applications. We demonstrate that TCR-BERT can be used to build state-of-the-art TCR-antigen binding predictors with improved generalizability compared to prior methods. TCR-BERT simultaneously facilitates clustering sequences likely to share antigen specificities. It also facilitates computational approaches to challenging, unsolved problems such as designing novel TCR sequences with engineered binding affinities. Importantly, TCR-BERT enables all these advances by focusing on residues with known biological significance. TCR-BERT can be a useful tool for T-cell scientists, enabling greater understanding and more diverse applications, and provides a conceptual framework for leveraging unlabeled data to improve machine learning on biological sequences.},
  langid = {english},
  pubstate = {prepublished},
  annotation = {1 citations (Crossref) [2022-08-19]},
  file = {/Users/Nasy/Zotero/storage/77B3YAK9/Wu et al. - 2021 - TCR-BERT learning the grammar of T-cell receptors.pdf;/Users/Nasy/Zotero/storage/BLQN25U9/2021.11.18.html}
}

@inproceedings{wuUserasGraphUserModeling2021,
  title = {User-as-{{Graph}}: {{User Modeling}} with {{Heterogeneous Graph Pooling}} for {{News Recommendation}}},
  shorttitle = {User-as-{{Graph}}},
  booktitle = {Proceedings of the {{Thirtieth International Joint Conference}} on {{Artificial Intelligence}}},
  author = {Wu, Chuhan and Wu, Fangzhao and Huang, Yongfeng and Xie, Xing},
  date = {2021-08},
  pages = {1624--1630},
  publisher = {International Joint Conferences on Artificial Intelligence Organization},
  location = {Montreal, Canada},
  doi = {10.24963/ijcai.2021/224},
  url = {https://www.ijcai.org/proceedings/2021/224},
  urldate = {2022-05-26},
  abstract = {Accurate user modeling is critical for news recommendation. Existing news recommendation methods usually model users’ interest from their behaviors via sequential or attentive models. However, they cannot model the rich relatedness between user behaviors, which can provide useful contexts of these behaviors for user interest modeling. In this paper, we propose a novel user modeling approach for news recommendation, which models each user as a personalized heterogeneous graph built from user behaviors to better capture the finegrained behavior relatedness. In addition, in order to learn user interest embedding from the personalized heterogeneous graph, we propose a novel heterogeneous graph pooling method, which can summarize both node features and graph topology, and be aware of the varied characteristics of different types of nodes. Experiments on large-scale benchmark dataset show the proposed methods can effectively improve the performance of user modeling for news recommendation.},
  eventtitle = {Thirtieth {{International Joint Conference}} on {{Artificial Intelligence}} \{\vphantom\}{{IJCAI-21}}\vphantom\{\}},
  isbn = {978-0-9992411-9-6},
  langid = {english},
  annotation = {3 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/CL9EPXHI/Wu et al. - 2021 - User-as-Graph User Modeling with Heterogeneous Gr.pdf}
}

@article{xiaImprovedDeepLearning2021,
  title = {Improved Deep Learning Techniques in Gravitational-Wave Data Analysis},
  author = {Xia, Heming and Shao, Lijing and Zhao, Junjie and Cao, Zhoujian},
  date = {2021-01-21},
  journaltitle = {Phys. Rev. D},
  volume = {103},
  number = {2},
  pages = {024040},
  publisher = {American Physical Society},
  doi = {10.1103/PhysRevD.103.024040},
  url = {https://link.aps.org/doi/10.1103/PhysRevD.103.024040},
  urldate = {2021-03-23},
  abstract = {In recent years, convolutional neural network (CNN) and other deep learning models have been gradually introduced into the area of gravitational-wave (GW) data processing. Compared with the traditional matched-filtering techniques, CNN has significant advantages in efficiency in GW signal detection tasks. In addition, matched-filtering techniques are based on the template bank of the existing theoretical waveform, which makes it difficult to find GW signals beyond theoretical expectation. In this paper, based on the task of GW detection of binary black holes, we introduce the optimization techniques of deep learning, such as batch normalization and dropout, to CNN models. Detailed studies of model performance are carried out. Through this study, we recommend to use batch normalization and dropout techniques in CNN models in GW signal detection tasks. Furthermore, we investigate the generalization ability of CNN models on different parameter ranges of GW signals. We point out that CNN models are robust to the variation of the parameter range of the GW waveform. This is a major advantage of deep learning models over matched-filtering techniques.},
  issue = {2},
  annotation = {7 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/PVWQTQHZ/Xia et al. - 2021 - Improved deep learning techniques in gravitational.pdf;/Users/Nasy/Zotero/storage/WHJLCETQ/PhysRevD.103.html}
}

@inproceedings{xieApplyingMachineLearning2019,
  title = {Applying {{Machine Learning}} to {{Understand Write Performance}} of {{Large-scale Parallel Filesystems}}},
  booktitle = {2019 {{IEEE}}/{{ACM Fourth International Parallel Data Systems Workshop}} ({{PDSW}})},
  author = {Xie, Bing and Tan, Zilong and Carns, Philip and Chase, Jeff and Harms, Kevin and Lofstead, Jay and Oral, Sarp and Vazhkudai, Sudharshan S. and Wang, Feiyi},
  date = {2019-11},
  pages = {30--39},
  doi = {10.1109/PDSW49588.2019.00008},
  abstract = {In high-performance computing (HPC), I/O performance prediction offers the potential to improve the efficiency of scientific computing. In particular, accurate prediction can make runtime estimates more precise, guide users toward optimal checkpoint strategies, and better inform facility provisioning and scheduling policies. HPC I/O performance is notoriously difficult to predict and model, however, in large part because of inherent variability and a lack of transparency in the behaviors of constituent storage system components. In this work we seek to advance the state of the art in HPC I/O performance prediction by (1) modeling the mean performance to address high variability, (2) deriving model features from write patterns, system architecture and system configurations, and (3) employing Lasso regression model to improve model accuracy. We demonstrate the efficacy of our approach by applying it to a crucial subset of common HPC I/O motifs, namely, file-per-process checkpoint write workloads. We conduct experiments on two distinct production HPC platforms - Titan at the Oak Ridge Leadership Computing Facility and Cetus at the Argonne Leadership Computing Facility - to train and evaluate our models. We find that we can attain ≤ 30\% relative error for 92.79\% and 99.64\% of the samples in our test set on these platforms, respectively.},
  eventtitle = {2019 {{IEEE}}/{{ACM Fourth International Parallel Data Systems Workshop}} ({{PDSW}})},
  keywords = {Large-scale parallel filesystem,machine learning,production supercomputer,write performance},
  annotation = {4 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/GRQDB2QR/8955192.html}
}

@inproceedings{xuanHardNegativeExamples2020,
  title = {Hard {{Negative Examples}} Are {{Hard}}, but {{Useful}}},
  booktitle = {Computer {{Vision}} – {{ECCV}} 2020},
  author = {Xuan, Hong and Stylianou, Abby and Liu, Xiaotong and Pless, Robert},
  editor = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael},
  date = {2020},
  series = {Lecture {{Notes}} in {{Computer Science}}},
  pages = {126--142},
  publisher = {Springer International Publishing},
  location = {Cham},
  doi = {10.1007/978-3-030-58568-6_8},
  abstract = {Triplet loss is an extremely common approach to distance metric learning. Representations of images from the same class are optimized to be mapped closer together in an embedding space than representations of images from different classes. Much work on triplet losses focuses on selecting the most useful triplets of images to consider, with strategies that select dissimilar examples from the same class or similar examples from different classes. The consensus of previous research is that optimizing with the hardest negative examples leads to bad training behavior. That’s a problem – these hardest negatives are literally the cases where the distance metric fails to capture semantic similarity. In this paper, we characterize the space of triplets and derive why hard negatives make triplet loss training fail. We offer a simple fix to the loss function and show that, with this fix, optimizing with hard negative examples becomes feasible. This leads to more generalizable features, and image retrieval results that outperform state of the art for datasets with high intra-class variance. Code is available at: https://github.com/littleredxh/HardNegative.git},
  isbn = {978-3-030-58568-6},
  langid = {english},
  keywords = {Deep metric learning,Hard negative,Triplet loss},
  annotation = {6 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/P4Z42YHR/Xuan et al. - 2020 - Hard Negative Examples are Hard, but Useful.pdf}
}

@inproceedings{xueModelingDynamicHeterogeneous2021,
  title = {Modeling {{Dynamic Heterogeneous Network}} for {{Link Prediction Using Hierarchical Attention}} with {{Temporal RNN}}},
  booktitle = {Machine {{Learning}} and {{Knowledge Discovery}} in {{Databases}}},
  author = {Xue, Hansheng and Yang, Luwei and Jiang, Wen and Wei, Yi and Hu, Yi and Lin, Yu},
  editor = {Hutter, Frank and Kersting, Kristian and Lijffijt, Jefrey and Valera, Isabel},
  date = {2021},
  series = {Lecture {{Notes}} in {{Computer Science}}},
  pages = {282--298},
  publisher = {Springer International Publishing},
  location = {Cham},
  doi = {10.1007/978-3-030-67658-2_17},
  abstract = {Network embedding aims to learn low-dimensional representations of nodes while capturing structure information of networks. It has achieved great success on many tasks of network analysis such as link prediction and node classification. Most of existing network embedding algorithms focus on how to learn static homogeneous networks effectively. However, networks in the real world are more complex, e.g.., networks may consist of several types of nodes and edges (called heterogeneous information) and may vary over time in terms of dynamic nodes and edges (called evolutionary patterns). Limited work has been done for network embedding of dynamic heterogeneous networks as it is challenging to learn both evolutionary and heterogeneous information simultaneously. In this paper, we propose a novel dynamic heterogeneous network embedding method, termed as DyHATR, which uses hierarchical attention to learn heterogeneous information and incorporates recurrent neural networks with temporal attention to capture evolutionary patterns. We benchmark our method on four real-world datasets for the task of link prediction. Experimental results show that DyHATR significantly outperforms several state-of-the-art baselines.},
  isbn = {978-3-030-67658-2},
  langid = {english},
  keywords = {Dynamic heterogeneous network,Hierarchical attention,Recurrent neural network,Temporal self-attention},
  annotation = {3 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/KBV6V69Z/Xue et al. - 2021 - Modeling Dynamic Heterogeneous Network for Link Pr.pdf}
}

@inproceedings{xuGeminiNovelUniversal2020,
  title = {Gemini: {{A Novel}} and {{Universal Heterogeneous Graph Information Fusing Framework}} for {{Online Recommendations}}},
  shorttitle = {Gemini},
  booktitle = {Proceedings of the 26th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Xu, Jixing and Zhu, Zhenlong and Zhao, Jianxin and Liu, Xuanye and Shan, Minghui and Guo, Jiecheng},
  date = {2020-08-23},
  series = {{{KDD}} '20},
  pages = {3356--3365},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3394486.3403388},
  url = {https://doi.org/10.1145/3394486.3403388},
  urldate = {2022-06-05},
  abstract = {Recently, network embedding has been successfully used in recommendation systems. Researchers have made efforts to utilize additional auxiliary information (e.g., social relations of users) to improve performance. However, such auxiliary information lacks compatibility for all recommendation scenarios, thus it is difficult to apply in some industrial scenarios where generality is required. Moreover, the heterogeneous nature between users and items aggravates the difficulty in network information fusion. Many works tried to transform user-item heterogeneous network to two homogeneous graphs (i.e., user-user and item-item), and then fuse information separately. This may limit the representation power of learned embedding due to ignoring the adjacent relationship in the original graph. In addition, the sparsity of user-item interactions is an urgent problem need to be solved. To solve the above problems, we propose a universal and effective framework named Gemini, which only relies on the common interaction logs, avoiding the dependence on auxiliary information and ensuring a better generality. For the purpose of keeping original adjacent relationship, Gemini transforms the original user-item heterogeneous graph into two semi homogeneous graphs from the perspective of users and items respectively. The transformed graphs consist of two types of nodes: network nodes coming from homogeneous nodes and attribute nodes coming from heterogeneous node. Then, the node representation is learned in a homogeneous way, with considering edge embedding at the same time. Simultaneously, the interaction sparsity problem is solved to some extent as the transformed graphs contain the original second-order neighbors. For training efficiently, we also propose an iterative training algorithm to reduce computational complexity. Experimental results on the five datasets and online A/B tests in recommendations of DiDiChuXing show that Gemini outperforms state-of-the-art algorithms.},
  isbn = {978-1-4503-7998-4},
  keywords = {heterogeneous graph,network embedding,nosource,recommendation},
  annotation = {5 citations (Crossref) [2022-08-03]}
}

@inproceedings{xuGeminiNovelUniversal2020a,
  title = {Gemini: {{A Novel}} and {{Universal Heterogeneous Graph Information Fusing Framework}} for {{Online Recommendations}}},
  shorttitle = {Gemini},
  booktitle = {Proceedings of the 26th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Xu, Jixing and Zhu, Zhenlong and Zhao, Jianxin and Liu, Xuanye and Shan, Minghui and Guo, Jiecheng},
  date = {2020-08-23},
  pages = {3356--3365},
  publisher = {ACM},
  location = {Virtual Event CA USA},
  doi = {10.1145/3394486.3403388},
  url = {https://dl.acm.org/doi/10.1145/3394486.3403388},
  urldate = {2022-06-05},
  abstract = {Recently, network embedding has been successfully used in recommendation systems. Researchers have made efforts to utilize additional auxiliary information (e.g., social relations of users) to improve performance. However, such auxiliary information lacks compatibility for all recommendation scenarios, thus it is difficult to apply in some industrial scenarios where generality is required. Moreover, the heterogeneous nature between users and items aggravates the difficulty in network information fusion. Many works tried to transform user-item heterogeneous network to two homogeneous graphs (i.e., user-user and item-item), and then fuse information separately. This may limit the representation power of learned embedding due to ignoring the adjacent relationship in the original graph. In addition, the sparsity of user-item interactions is an urgent problem need to be solved.},
  eventtitle = {{{KDD}} '20: {{The}} 26th {{ACM SIGKDD Conference}} on {{Knowledge Discovery}} and {{Data Mining}}},
  isbn = {978-1-4503-7998-4},
  langid = {english},
  annotation = {5 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/5WEXTWU7/Xu et al. - 2020 - Gemini A Novel and Universal Heterogeneous Graph .pdf}
}

@inproceedings{xuLearningSharedVertex2019,
  title = {Learning {{Shared Vertex Representation}} in {{Heterogeneous Graphs}} with {{Convolutional Networks}} for {{Recommendation}}},
  booktitle = {Proceedings of the {{Twenty-Eighth International Joint Conference}} on {{Artificial Intelligence}}},
  author = {Xu, Yanan and Zhu, Yanmin and Shen, Yanyan and Yu, Jiadi},
  date = {2019-08},
  pages = {4620--4626},
  publisher = {International Joint Conferences on Artificial Intelligence Organization},
  location = {Macao, China},
  doi = {10.24963/ijcai.2019/642},
  url = {https://www.ijcai.org/proceedings/2019/642},
  urldate = {2022-05-26},
  abstract = {Collaborative Filtering (CF) is among the most successful techniques in recommendation tasks. Recent works have shown a boost of performance of CF when introducing the pairwise relationships between users and items or among items (users) using interaction data. However, these works usually only utilize one kind of information, i.e., user preference in a user-item interaction matrix or item dependency in interaction sequences which can limit the recommendation performance. In this paper, we propose to mine three kinds of information (user preference, item dependency, and user similarity on behaviors) by converting interaction sequence data into multiple graphs (i.e., a user-item graph, an item-item graph, and a user-subseq graph). We design a novel graph convolutional network (PGCN) to learn shared representations of users and items with the three heterogeneous graphs. In our approach, a neighbor pooling and a convolution operation are designed to aggregate features of neighbors. Extensive experiments on two real-world datasets demonstrate that our graph convolution approaches outperform various competitive methods in terms of two metrics, and the heterogeneous graphs are proved effective for improving recommendation performance.},
  eventtitle = {Twenty-{{Eighth International Joint Conference}} on {{Artificial Intelligence}} \{\vphantom\}{{IJCAI-19}}\vphantom\{\}},
  isbn = {978-0-9992411-4-1},
  langid = {english},
  annotation = {7 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/R9CRI7KC/Xu et al. - 2019 - Learning Shared Vertex Representation in Heterogen.pdf}
}

@unpublished{xuRepresentationLearningGraphs2018,
  title = {Representation {{Learning}} on {{Graphs}} with {{Jumping Knowledge Networks}}},
  author = {Xu, Keyulu and Li, Chengtao and Tian, Yonglong and Sonobe, Tomohiro and Kawarabayashi, Ken-ichi and Jegelka, Stefanie},
  date = {2018-06-25},
  eprint = {1806.03536},
  eprinttype = {arXiv},
  eprintclass = {cs, stat},
  url = {http://arxiv.org/abs/1806.03536},
  urldate = {2022-03-11},
  abstract = {Recent deep learning approaches for representation learning on graphs follow a neighborhood aggregation procedure. We analyze some important properties of these models, and propose a strategy to overcome those. In particular, the range of "neighboring" nodes that a node's representation draws from strongly depends on the graph structure, analogous to the spread of a random walk. To adapt to local neighborhood properties and tasks, we explore an architecture -- jumping knowledge (JK) networks -- that flexibly leverages, for each node, different neighborhood ranges to enable better structure-aware representation. In a number of experiments on social, bioinformatics and citation networks, we demonstrate that our model achieves state-of-the-art performance. Furthermore, combining the JK framework with models like Graph Convolutional Networks, GraphSAGE and Graph Attention Networks consistently improves those models' performance.},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/4MWZQ247/Xu et al. - 2018 - Representation Learning on Graphs with Jumping Kno.pdf;/Users/Nasy/Zotero/storage/RUK66THP/1806.html}
}

@inproceedings{yangHeterogeneousGraphInformation2021,
  title = {Heterogeneous {{Graph Information Bottleneck}}},
  booktitle = {Proceedings of the {{Thirtieth International Joint Conference}} on {{Artificial Intelligence}}},
  author = {Yang, Liang and Wu, Fan and Zheng, Zichen and Niu, Bingxin and Gu, Junhua and Wang, Chuan and Cao, Xiaochun and Guo, Yuanfang},
  date = {2021-08},
  pages = {1638--1645},
  publisher = {International Joint Conferences on Artificial Intelligence Organization},
  location = {Montreal, Canada},
  doi = {10.24963/ijcai.2021/226},
  url = {https://www.ijcai.org/proceedings/2021/226},
  urldate = {2022-05-26},
  abstract = {Most attempts on extending Graph Neural Networks (GNNs) to Heterogeneous Information Networks (HINs) implicitly take the direct assumption that the multiple homogeneous attributed networks induced by different meta-paths are complementary. The doubts about the hypothesis of complementary motivate an alternative assumption of consensus. That is, the aggregated node attributes shared by multiple homogeneous attributed networks are essential for node representations, while the specific ones in each homogeneous attributed network should be discarded. In this paper, a novel Heterogeneous Graph Information Bottleneck (HGIB) is proposed to implement the consensus hypothesis in an unsupervised manner. To this end, information bottleneck (IB) is extended to unsupervised representation learning by leveraging self-supervision strategy. Specifically, HGIB simultaneously maximizes the mutual information between one homogeneous network and the representation learned from another homogeneous network, while minimizes the mutual information between the specific information contained in one homogeneous network and the representation learned from this homogeneous network. Model analysis reveals that the two extreme cases of HGIB correspond to the supervised heterogeneous GNN and the infomax on homogeneous graph, respectively. Extensive experiments on real datasets demonstrate that the consensus-based unsupervised HGIB significantly outperforms most semi-supervised SOTA methods based on complementary assumption.},
  eventtitle = {Thirtieth {{International Joint Conference}} on {{Artificial Intelligence}} \{\vphantom\}{{IJCAI-21}}\vphantom\{\}},
  isbn = {978-0-9992411-9-6},
  langid = {english},
  annotation = {0 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/ZRNIS9AF/Yang et al. - 2021 - Heterogeneous Graph Information Bottleneck.pdf}
}

@online{yangHyperbolicGraphNeural2022,
  title = {Hyperbolic {{Graph Neural Networks}}: {{A Review}} of {{Methods}} and {{Applications}}},
  shorttitle = {Hyperbolic {{Graph Neural Networks}}},
  author = {Yang, Menglin and Zhou, Min and Li, Zhihao and Liu, Jiahong and Pan, Lujia and Xiong, Hui and King, Irwin},
  date = {2022-02-28},
  eprint = {2202.13852},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2202.13852},
  url = {http://arxiv.org/abs/2202.13852},
  urldate = {2023-03-07},
  abstract = {Graph neural networks generalize conventional neural networks to graph-structured data and have received widespread attention due to their impressive representation ability. In spite of the remarkable achievements, the performance of Euclidean models in graph-related learning is still bounded and limited by the representation ability of Euclidean geometry, especially for datasets with highly non-Euclidean latent anatomy. Recently, hyperbolic space has gained increasing popularity in processing graph data with tree-like structure and power-law distribution, owing to its exponential growth property. In this survey, we comprehensively revisit the technical details of the current hyperbolic graph neural networks, unifying them into a general framework and summarizing the variants of each component. More importantly, we present various HGNN-related applications. Last, we also identify several challenges, which potentially serve as guidelines for further flourishing the achievements of graph learning in hyperbolic spaces.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/GGNCQF3T/Yang et al. - 2022 - Hyperbolic Graph Neural Networks A Review of Meth.pdf;/Users/Nasy/Zotero/storage/D6BHP9EJ/2202.html}
}

@online{yangVisionLanguagePreTrainingTriple2022,
  title = {Vision-{{Language Pre-Training}} with {{Triple Contrastive Learning}}},
  author = {Yang, Jinyu and Duan, Jiali and Tran, Son and Xu, Yi and Chanda, Sampath and Chen, Liqun and Zeng, Belinda and Chilimbi, Trishul and Huang, Junzhou},
  date = {2022-03-28},
  eprint = {2202.10401},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2202.10401},
  url = {http://arxiv.org/abs/2202.10401},
  abstract = {Vision-language representation learning largely benefits from image-text alignment through contrastive losses (e.g., InfoNCE loss). The success of this alignment strategy is attributed to its capability in maximizing the mutual information (MI) between an image and its matched text. However, simply performing cross-modal alignment (CMA) ignores data potential within each modality, which may result in degraded representations. For instance, although CMA-based models are able to map image-text pairs close together in the embedding space, they fail to ensure that similar inputs from the same modality stay close by. This problem can get even worse when the pre-training data is noisy. In this paper, we propose triple contrastive learning (TCL) for vision-language pre-training by leveraging both cross-modal and intra-modal self-supervision. Besides CMA, TCL introduces an intra-modal contrastive objective to provide complementary benefits in representation learning. To take advantage of localized and structural information from image and text input, TCL further maximizes the average MI between local regions of image/text and their global summary. To the best of our knowledge, ours is the first work that takes into account local structure information for multi-modality representation learning. Experimental evaluations show that our approach is competitive and achieves the new state of the art on various common down-stream vision-language tasks such as image-text retrieval and visual question answering.},
  pubstate = {prepublished},
  keywords = {Computer Science - Computer Vision and Pattern Recognition},
  file = {/Users/Nasy/Zotero/storage/XNJBWTWS/Yang et al. - 2022 - Vision-Language Pre-Training with Triple Contrasti.pdf;/Users/Nasy/Zotero/storage/ZS94TDHQ/2202.html}
}

@article{yanHDOCKServerIntegrated2020a,
  title = {The {{HDOCK}} Server for Integrated Protein–Protein Docking},
  author = {Yan, Yumeng and Tao, Huanyu and He, Jiahua and Huang, Sheng-You},
  date = {2020-05},
  journaltitle = {Nat Protoc},
  volume = {15},
  number = {5},
  pages = {1829--1852},
  publisher = {Nature Publishing Group},
  issn = {1750-2799},
  doi = {10.1038/s41596-020-0312-x},
  url = {https://www.nature.com/articles/s41596-020-0312-x},
  abstract = {The HDOCK server (http://hdock.phys.hust.edu.cn/) is a highly integrated suite of homology search, template-based modeling, structure prediction, macromolecular docking, biological information incorporation and job management for robust and fast protein–protein docking. With input information for receptor and ligand molecules (either amino acid sequences or Protein Data Bank structures), the server automatically predicts their interaction through a hybrid algorithm of template-based and template-free docking. The HDOCK server distinguishes itself from similar docking servers in its ability to support amino acid sequences as input and a hybrid docking strategy in which experimental information about the protein–protein binding site and small-angle X-ray scattering can be incorporated during the docking and post-docking processes. Moreover, HDOCK also supports protein–RNA/DNA docking with an intrinsic scoring function. The server delivers both template- and docking-based binding models of two molecules and allows for download and interactive visualization. The HDOCK server is user friendly and has processed {$>$}30,000 docking jobs since its official release in 2017. The server can normally complete a docking job within 30 min.},
  issue = {5},
  langid = {english},
  keywords = {Computational models,Molecular modelling,Protein structure predictions,Software},
  annotation = {255 citations (Crossref) [2022-09-14]},
  file = {/Users/Nasy/Zotero/storage/5G84YE9D/Yan et al. - 2020 - The HDOCK server for integrated protein–protein do.pdf;/Users/Nasy/Zotero/storage/QL788UCP/s41596-020-0312-x.html}
}

@article{yeGeospatialSemanticMapping2021,
  title = {Geospatial and {{Semantic Mapping Platform}} for {{Massive COVID-19 Scientific Publication Search}}},
  author = {Ye, Xinyue and Du, Jiaxin and Gong, Xi and Na, Saiyang and Li, Weimin and Kudva, Sonali},
  date = {2021-01-19},
  journaltitle = {J geovis spat anal},
  volume = {5},
  number = {1},
  pages = {5},
  issn = {2509-8829},
  doi = {10.1007/s41651-021-00073-y},
  url = {https://doi.org/10.1007/s41651-021-00073-y},
  urldate = {2023-08-22},
  abstract = {Thousands of results of scientific publications on the topic of COVID-19 have emerged within a short time, raising significant challenges for scholars to organize the research, or even synthesize the knowledge in a timely or comprehensive manner. To facilitate this, we have developed a geospatial and semantic mapping platform to search and organize these large and unmapped digital collections. The semantic map visualizes research topics based on customized natural language processing algorithms, which helps users to identify their content of specific interest beyond keyword searches in web search engines. The resultant geospatial map extracts all the location names mentioned in the publications, illustrating where COVID-19 studies have been conducted and where neglected study areas might exist. The system has been deployed as a web service for public access.},
  langid = {english},
  keywords = {COVID-19,Geospatial map,Knowledge discovery,Semantic map},
  file = {/Users/Nasy/Zotero/storage/JF4IGMTS/Ye et al. - 2021 - Geospatial and Semantic Mapping Platform for Massive COVID-19 Scientific Publication Search.pdf}
}

@online{yeMPLUGOwlModularizationEmpowers2023,
  title = {{{mPLUG-Owl}}: {{Modularization Empowers Large Language Models}} with {{Multimodality}}},
  shorttitle = {{{mPLUG-Owl}}},
  author = {Ye, Qinghao and Xu, Haiyang and Xu, Guohai and Ye, Jiabo and Yan, Ming and Zhou, Yiyang and Wang, Junyang and Hu, Anwen and Shi, Pengcheng and Shi, Yaya and Li, Chenliang and Xu, Yuanhong and Chen, Hehong and Tian, Junfeng and Qi, Qian and Zhang, Ji and Huang, Fei},
  date = {2023-04-27},
  eprint = {2304.14178},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2304.14178},
  url = {http://arxiv.org/abs/2304.14178},
  urldate = {2023-07-16},
  abstract = {Large language models (LLMs) have demonstrated impressive zero-shot abilities on a variety of open-ended tasks, while recent research has also explored the use of LLMs for multi-modal generation. In this study, we introduce mPLUG-Owl, a novel training paradigm that equips LLMs with multi-modal abilities through modularized learning of foundation LLM, a visual knowledge module, and a visual abstractor module. This approach can support multiple modalities and facilitate diverse unimodal and multimodal abilities through modality collaboration. The training paradigm of mPLUG-Owl involves a two-stage method for aligning image and text, which learns visual knowledge with the assistance of LLM while maintaining and even improving the generation abilities of LLM. In the first stage, the visual knowledge module and abstractor module are trained with a frozen LLM module to align the image and text. In the second stage, language-only and multi-modal supervised datasets are used to jointly fine-tune a low-rank adaption (LoRA) module on LLM and the abstractor module by freezing the visual knowledge module. We carefully build a visually-related instruction evaluation set OwlEval. Experimental results show that our model outperforms existing multi-modal models, demonstrating mPLUG-Owl's impressive instruction and visual understanding ability, multi-turn conversation ability, and knowledge reasoning ability. Besides, we observe some unexpected and exciting abilities such as multi-image correlation and scene text understanding, which makes it possible to leverage it for harder real scenarios, such as vision-only document comprehension. Our code, pre-trained model, instruction-tuned models, and evaluation set are available at https://github.com/X-PLUG/mPLUG-Owl. The online demo is available at https://www.modelscope.cn/studios/damo/mPLUG-Owl.},
  pubstate = {prepublished},
  keywords = {Computer Science - Computation and Language,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/RFQETA4M/Ye et al. - 2023 - mPLUG-Owl Modularization Empowers Large Language .pdf;/Users/Nasy/Zotero/storage/SKBQQNDZ/2304.html}
}

@article{yeOutofsampleNodeRepresentation2019,
  title = {Out-of-Sample {{Node Representation Learning}} for {{Heterogeneous Graph}} in {{Real-time Android Malware Detection}}},
  author = {Ye, Yanfang and Hou, Shifu and Chen, Lingwei and Lei, Jingwei and Wan, Wenqiang and Wang, Jiabin and Xiong, Qi and Shao, Fudong},
  date = {2019},
  pages = {4150--4156},
  url = {https://www.ijcai.org/proceedings/2019/576},
  urldate = {2022-05-26},
  abstract = {Electronic proceedings of IJCAI 2019},
  file = {/Users/Nasy/Zotero/storage/85W5C4BX/576.html}
}

@inproceedings{yeOutofsampleNodeRepresentation2019a,
  title = {Out-of-Sample {{Node Representation Learning}} for {{Heterogeneous Graph}} in {{Real-time Android Malware Detection}}},
  booktitle = {Proceedings of the {{Twenty-Eighth International Joint Conference}} on {{Artificial Intelligence}}},
  author = {Ye, Yanfang and Hou, Shifu and Chen, Lingwei and Lei, Jingwei and Wan, Wenqiang and Wang, Jiabin and Xiong, Qi and Shao, Fudong},
  date = {2019-08},
  pages = {4150--4156},
  publisher = {International Joint Conferences on Artificial Intelligence Organization},
  location = {Macao, China},
  doi = {10.24963/ijcai.2019/576},
  url = {https://www.ijcai.org/proceedings/2019/576},
  urldate = {2022-05-26},
  abstract = {The increasingly sophisticated Android malware calls for new defensive techniques that are capable of protecting mobile users against novel threats. In this paper, we first extract the runtime Application Programming Interface (API) call sequences from Android apps, and then analyze higher-level semantic relations within the ecosystem to comprehensively characterize the apps. To model different types of entities (i.e., app, API, device, signature, affiliation) and rich relations among them, we present a structured heterogeneous graph (HG) for modeling. To efficiently classify nodes (e.g., apps) in the constructed HG, we propose the HGLearning method to first obtain in-sample node embeddings and then learn representations of out-ofsample nodes without rerunning/adjusting HG embeddings at the first attempt. We later design a deep neural network classifier taking the learned HG representations as inputs for real-time Android malware detection. Comprehensive experiments on large-scale and real sample collections from Tencent Security Lab are performed. Promising results demonstrate that our developed system AiDroid which integrates our proposed method outperforms others in real-time Android malware detection.},
  eventtitle = {Twenty-{{Eighth International Joint Conference}} on {{Artificial Intelligence}} \{\vphantom\}{{IJCAI-19}}\vphantom\{\}},
  isbn = {978-0-9992411-4-1},
  langid = {english},
  annotation = {13 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/2SYPAAG7/Ye et al. - 2019 - Out-of-sample Node Representation Learning for Het.pdf}
}

@inproceedings{yinStrategiesDeployScale2019,
  title = {Strategies to {{Deploy}} and {{Scale Deep Learning}} on the {{Summit Supercomputer}}},
  booktitle = {2019 {{IEEE}}/{{ACM Third Workshop}} on {{Deep Learning}} on {{Supercomputers}} ({{DLS}})},
  author = {Yin, Junqi and Gahlot, Shubhankar and Laanait, Nouamane and Maheshwari, Ketan and Morrison, Jack and Dash, Sajal and Shankar, Mallikarjun},
  date = {2019-11},
  pages = {84--94},
  doi = {10.1109/DLS49591.2019.00016},
  abstract = {The rapid growth and wide applicability of Deep Learning (DL) frameworks poses challenges to computing centers which need to deploy and support the software, and also to domain scientists who have to keep up with the system environment and scale up scientific exploration through DL. We offer recommendations for deploying and scaling DL frameworks on the Summit supercomputer, currently atop the Top500 list, at the Oak Ridge National Laboratory Leadership Computing Facility (OLCF). We discuss DL software deployment in the form of containers, and compare performance of native-built frameworks and containerized deployment. Software containers show no noticeable negative performance impact and exhibit faster Python loading times and promise easier maintenance. To explore strategies for scaling up DL model training campaigns, we assess DL compute kernel performance, discuss and recommend I/O data formats and staging, and identify communication needs for scalable message exchange for DL runs at scale. We recommend that users take a step-wise tuning approach beginning with algorithmic kernel choice, node I/O configuration, and communications tuning as best-practice. We present baseline examples of scaling efficiency 87\% for a DL run of ResNet50 running on 1024 nodes (6144 V100 GPUs).},
  eventtitle = {2019 {{IEEE}}/{{ACM Third Workshop}} on {{Deep Learning}} on {{Supercomputers}} ({{DLS}})},
  keywords = {Containers,Deep learning,Graphics processing units,Libraries,Supercomputers,Tuning},
  annotation = {9 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/UMH65ZZK/8945109.html}
}

@online{yinSurveyMultimodalLarge2023,
  title = {A {{Survey}} on {{Multimodal Large Language Models}}},
  author = {Yin, Shukang and Fu, Chaoyou and Zhao, Sirui and Li, Ke and Sun, Xing and Xu, Tong and Chen, Enhong},
  date = {2023-06-23},
  eprint = {2306.13549},
  eprinttype = {arXiv},
  eprintclass = {cs},
  url = {http://arxiv.org/abs/2306.13549},
  urldate = {2023-07-14},
  abstract = {Multimodal Large Language Model (MLLM) recently has been a new rising research hotspot, which uses powerful Large Language Models (LLMs) as a brain to perform multimodal tasks. The surprising emergent capabilities of MLLM, such as writing stories based on images and OCR-free math reasoning, are rare in traditional methods, suggesting a potential path to artificial general intelligence. In this paper, we aim to trace and summarize the recent progress of MLLM. First of all, we present the formulation of MLLM and delineate its related concepts. Then, we discuss the key techniques and applications, including Multimodal Instruction Tuning (M-IT), Multimodal In-Context Learning (M-ICL), Multimodal Chain of Thought (M-CoT), and LLM-Aided Visual Reasoning (LAVR). Finally, we discuss existing challenges and point out promising research directions. In light of the fact that the era of MLLM has only just begun, we will keep updating this survey and hope it can inspire more research. An associated GitHub link collecting the latest papers is available at https://github.com/BradyFU/Awesome-Multimodal-Large-Language-Models.},
  pubstate = {prepublished},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/JPJBBIDD/Yin et al. - 2023 - A Survey on Multimodal Large Language Models.pdf;/Users/Nasy/Zotero/storage/4ENVDMLC/2306.html}
}

@article{youUsingManifoldEmbedding2010,
  title = {Using Manifold Embedding for Assessing and Predicting Protein Interactions from High-Throughput Experimental Data},
  author = {You, Zhu-Hong and Lei, Ying-Ke and Gui, Jie and Huang, De-Shuang and Zhou, Xiaobo},
  date = {2010-11-01},
  journaltitle = {Bioinformatics},
  volume = {26},
  number = {21},
  pages = {2744--2751},
  issn = {1367-4803},
  doi = {10.1093/bioinformatics/btq510},
  url = {https://doi.org/10.1093/bioinformatics/btq510},
  urldate = {2023-08-14},
  abstract = {Motivation: High-throughput protein interaction data, with ever-increasing volume, are becoming the foundation of many biological discoveries, and thus high-quality protein–protein interaction (PPI) maps are critical for a deeper understanding of cellular processes. However, the unreliability and paucity of current available PPI data are key obstacles to the subsequent quantitative studies. It is therefore highly desirable to develop an approach to deal with these issues from the computational perspective. Most previous works for assessing and predicting protein interactions either need supporting evidences from multiple information resources or are severely impacted by the sparseness of PPI networks.Results: We developed a robust manifold embedding technique for assessing the reliability of interactions and predicting new interactions, which purely utilizes the topological information of PPI networks and can work on a sparse input protein interactome without requiring additional information types. After transforming a given PPI network into a low-dimensional metric space using manifold embedding based on isometric feature mapping (ISOMAP), the problem of assessing and predicting protein interactions is recasted into the form of measuring similarity between points of its metric space. Then a reliability index, a likelihood indicating the interaction of two proteins, is assigned to each protein pair in the PPI networks based on the similarity between the points in the embedded space. Validation of the proposed method is performed with extensive experiments on densely connected and sparse PPI network of yeast, respectively. Results demonstrate that the interactions ranked top by our method have high-functional homogeneity and localization coherence, especially our method is very efficient for large sparse PPI network with which the traditional algorithms fail. Therefore, the proposed algorithm is a much more promising method to detect both false positive and false negative interactions in PPI networks.Availability: MATLAB code implementing the algorithm is available from the web site http://home.ustc.edu.cn/∼yzh33108/Manifold.htm.Contact: ~dshuang@iim.ac.cnSupplementary information: ~Supplementary data are available at Bioinformatics online.},
  file = {/Users/Nasy/Zotero/storage/PB8S86YG/You et al. - 2010 - Using manifold embedding for assessing and predicting protein interactions from high-throughput expe.pdf;/Users/Nasy/Zotero/storage/6ZC72MZ6/213668.html}
}

@article{yTEINetDeepLearning2023,
  title = {{{TEINet}}: A Deep Learning Framework for Prediction of {{TCR-epitope}} Binding Specificity},
  shorttitle = {{{TEINet}}},
  author = {Y, Jiang and M, Huo and S, Cheng Li},
  date = {2023-03-19},
  journaltitle = {Briefings in bioinformatics},
  volume = {24},
  number = {2},
  eprint = {36907658},
  eprinttype = {pmid},
  publisher = {Brief Bioinform},
  issn = {1477-4054},
  doi = {10.1093/bib/bbad086},
  url = {https://pubmed.ncbi.nlm.nih.gov/36907658/},
  urldate = {2024-10-07},
  abstract = {The adaptive immune response to foreign antigens is initiated by T-cell receptor (TCR) recognition on the antigens. Recent experimental advances have enabled the generation of a large amount of TCR data and their cognate antigenic targets, allowing machine learning models to predict the binding spec …},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/6MHTVSYW/Y et al. - 2023 - TEINet a deep learning framework for prediction of TCR-epitope binding specificity.pdf;/Users/Nasy/Zotero/storage/BSINQ86Y/36907658.html}
}

@misc{yuCollaborativeFilteringEntity,
  title = {Collaborative {{Filtering}} with {{Entity Similarity Regularization}} in {{Heterogeneous Information Networks}}},
  author = {Yu, Xiao and Ren, Xiang and Gu, Quanquan and Sun, Yizhou and Han, Jiawei},
  abstract = {Researchers have been studying hybrid recommender systems which combine user-item rating data with external information in recent years. Some studies suggest that by leveraging additional user and / or item relations, e.g., social network, the performance of the recommendation models can be improved. These studies, nevertheless, mostly utilize a single type of external relationship. Considering the heterogeneity of real-world applications, we propose to position the well-studied recommendation problem in a heterogeneous information network context and attempt to incorporate different recommendation factors. We discuss how heterogeneous information network can benefit recommender systems and then propose a matrix factorization based unified recommendation model to take advantage of both rating data and the related information network. Empirical studies show that our approach outperforms several state-of-the-art recommendation methods on explicit rating data. 1},
  file = {/Users/Nasy/Zotero/storage/Z4MFWHVU/Yu et al. - Collaborative Filtering with Entity Similarity Reg.pdf;/Users/Nasy/Zotero/storage/SVHSYW8K/summary.html}
}

@inproceedings{yuHeterogeneousGraphLearning2019,
  title = {Heterogeneous {{Graph Learning}} for {{Visual Commonsense Reasoning}}},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author = {Yu, Weijiang and Zhou, Jingwen and Yu, Weihao and Liang, Xiaodan and Xiao, Nong},
  date = {2019},
  volume = {32},
  publisher = {Curran Associates, Inc.},
  url = {https://proceedings.neurips.cc/paper/2019/hash/8f19793b2671094e63a15ab883d50137-Abstract.html},
  urldate = {2022-05-26},
  abstract = {Visual commonsense reasoning task aims at leading the research field into solving cognition-level reasoning with the ability to predict correct answers and meanwhile providing convincing reasoning paths, resulting in three sub-tasks i.e., Q-{$>$}A, QA-{$>$}R and Q-{$>$}AR. It poses great challenges over the proper semantic alignment between vision and linguistic domains and knowledge reasoning to generate persuasive reasoning paths. Existing works either resort to a powerful end-to-end network that cannot produce interpretable reasoning paths or solely explore intra-relationship of visual objects (homogeneous graph) while ignoring the cross-domain semantic alignment among visual concepts and linguistic words. In this paper, we propose a new Heterogeneous Graph Learning (HGL) framework for seamlessly integrating the intra-graph and inter-graph reasoning in order to bridge the vision and language domain. Our HGL consists of a primal vision-to-answer heterogeneous graph (VAHG) module and a dual question-to-answer heterogeneous graph (QAHG) module to interactively refine reasoning paths for semantic agreement. Moreover, our HGL integrates a contextual voting module to exploit a long-range visual context for better global reasoning. Experiments on the large-scale Visual Commonsense Reasoning benchmark demonstrate the superior performance of our proposed modules on three tasks (improving 5\% accuracy on Q-{$>$}A, 3.5\% on QA-{$>$}R, 5.8\% on Q-{$>$}AR).},
  file = {/Users/Nasy/Zotero/storage/WI67GGNE/Yu et al. - 2019 - Heterogeneous Graph Learning for Visual Commonsens.pdf}
}

@article{yuMolecularGraphRepresentation2021,
  title = {Molecular {{Graph Representation Learning}} via {{Heterogeneous Motif Graph Construction}}},
  author = {Yu, Zhaoning and Gao, Hongyang},
  date = {2021-09-29},
  url = {https://openreview.net/forum?id=8gX3bY78aCb},
  urldate = {2022-05-26},
  abstract = {We consider feature representation learning of molecular graphs. Graph Neural Networks have been widely used in feature representation learning of molecular graphs. However, most proposed methods...},
  langid = {english},
  keywords = {Graph Neural Networks,Heterogeneous,Molecular Graph Representation},
  file = {/Users/Nasy/Zotero/storage/WTIZJEIS/Yu and Gao - 2021 - Molecular Graph Representation Learning via Hetero.pdf;/Users/Nasy/Zotero/storage/4FJUFZWV/forum.html}
}

@online{yunCutMixRegularizationStrategy2019,
  title = {{{CutMix}}: {{Regularization Strategy}} to {{Train Strong Classifiers}} with {{Localizable Features}}},
  shorttitle = {{{CutMix}}},
  author = {Yun, Sangdoo and Han, Dongyoon and Oh, Seong Joon and Chun, Sanghyuk and Choe, Junsuk and Yoo, Youngjoon},
  date = {2019-08-07},
  eprint = {1905.04899},
  eprinttype = {arXiv},
  doi = {10.48550/arXiv.1905.04899},
  url = {http://arxiv.org/abs/1905.04899},
  urldate = {2024-10-31},
  abstract = {Regional dropout strategies have been proposed to enhance the performance of convolutional neural network classifiers. They have proved to be effective for guiding the model to attend on less discriminative parts of objects (e.g. leg as opposed to head of a person), thereby letting the network generalize better and have better object localization capabilities. On the other hand, current methods for regional dropout remove informative pixels on training images by overlaying a patch of either black pixels or random noise. Such removal is not desirable because it leads to information loss and inefficiency during training. We therefore propose the CutMix augmentation strategy: patches are cut and pasted among training images where the ground truth labels are also mixed proportionally to the area of the patches. By making efficient use of training pixels and retaining the regularization effect of regional dropout, CutMix consistently outperforms the state-of-the-art augmentation strategies on CIFAR and ImageNet classification tasks, as well as on the ImageNet weakly-supervised localization task. Moreover, unlike previous augmentation methods, our CutMix-trained ImageNet classifier, when used as a pretrained model, results in consistent performance gains in Pascal detection and MS-COCO image captioning benchmarks. We also show that CutMix improves the model robustness against input corruptions and its out-of-distribution detection performances. Source code and pretrained models are available at https://github.com/clovaai/CutMix-PyTorch .},
  pubstate = {prepublished},
  keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/TMPFZRK4/Yun et al. - 2019 - CutMix Regularization Strategy to Train Strong Classifiers with Localizable Features.pdf;/Users/Nasy/Zotero/storage/XIL9V6ME/1905.html}
}

@article{zengProteinProteinInteraction2020,
  title = {Protein–Protein Interaction Site Prediction through Combining Local and Global Features with Deep Neural Networks},
  author = {Zeng, Min and Zhang, Fuhao and Wu, Fang-Xiang and Li, Yaohang and Wang, Jianxin and Li, Min},
  date = {2020-02-15},
  journaltitle = {Bioinformatics},
  volume = {36},
  number = {4},
  pages = {1114--1120},
  issn = {1367-4803},
  doi = {10.1093/bioinformatics/btz699},
  url = {https://doi.org/10.1093/bioinformatics/btz699},
  urldate = {2022-08-19},
  abstract = {Protein–protein interactions (PPIs) play important roles in many biological processes. Conventional biological experiments for identifying PPI sites are costly and time-consuming. Thus, many computational approaches have been proposed to predict PPI sites. Existing computational methods usually use local contextual features to predict PPI sites. Actually, global features of protein sequences are critical for PPI site prediction.A new end-to-end deep learning framework, named DeepPPISP, through combining local contextual and global sequence features, is proposed for PPI site prediction. For local contextual features, we use a sliding window to capture features of neighbors of a target amino acid as in previous studies. For global sequence features, a text convolutional neural network is applied to extract features from the whole protein sequence. Then the local contextual and global sequence features are combined to predict PPI sites. By integrating local contextual and global sequence features, DeepPPISP achieves the state-of-the-art performance, which is better than the other competing methods. In order to investigate if global sequence features are helpful in our deep learning model, we remove or change some components in DeepPPISP. Detailed analyses show that global sequence features play important roles in DeepPPISP.The DeepPPISP web server is available at http://bioinformatics.csu.edu.cn/PPISP/. The source code can be obtained from https://github.com/CSUBioGroup/DeepPPISP.Supplementary data are available at Bioinformatics online.},
  annotation = {34 citations (Crossref) [2022-08-19]},
  file = {/Users/Nasy/Zotero/storage/D7N3TF9Y/Zeng et al. - 2020 - Protein–protein interaction site prediction throug.pdf;/Users/Nasy/Zotero/storage/75D6NQY8/5564115.html}
}

@article{zengProteinproteinInteractionSite2020,
  title = {Protein-Protein Interaction Site Prediction through Combining Local and Global Features with Deep Neural Networks},
  author = {Zeng, Min and Zhang, Fuhao and Wu, Fang-Xiang and Li, Yaohang and Wang, Jianxin and Li, Min},
  date = {2020-02-15},
  journaltitle = {Bioinformatics},
  volume = {36},
  number = {4},
  eprint = {31593229},
  eprinttype = {pmid},
  pages = {1114--1120},
  issn = {1367-4811},
  doi = {10.1093/bioinformatics/btz699},
  abstract = {MOTIVATION: Protein-protein interactions (PPIs) play important roles in many biological processes. Conventional biological experiments for identifying PPI sites are costly and time-consuming. Thus, many computational approaches have been proposed to predict PPI sites. Existing computational methods usually use local contextual features to predict PPI sites. Actually, global features of protein sequences are critical for PPI site prediction. RESULTS: A new end-to-end deep learning framework, named DeepPPISP, through combining local contextual and global sequence features, is proposed for PPI site prediction. For local contextual features, we use a sliding window to capture features of neighbors of a target amino acid as in previous studies. For global sequence features, a text convolutional neural network is applied to extract features from the whole protein sequence. Then the local contextual and global sequence features are combined to predict PPI sites. By integrating local contextual and global sequence features, DeepPPISP achieves the state-of-the-art performance, which is better than the other competing methods. In order to investigate if global sequence features are helpful in our deep learning model, we remove or change some components in DeepPPISP. Detailed analyses show that global sequence features play important roles in DeepPPISP. AVAILABILITY AND IMPLEMENTATION: The DeepPPISP web server is available at http://bioinformatics.csu.edu.cn/PPISP/. The source code can be obtained from https://github.com/CSUBioGroup/DeepPPISP. SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
  langid = {english},
  keywords = {Amino Acid Sequence,Neural Networks Computer,Protein Interaction Domains and Motifs,Proteins,Software},
  file = {/Users/Nasy/Zotero/storage/DSH38EC5/Zeng et al. - 2020 - Protein-protein interaction site prediction through combining local and global features with deep ne.pdf}
}

@article{zhangBiobjectiveWorkflowScheduling2017,
  title = {Bi-Objective Workflow Scheduling of the Energy Consumption and Reliability in Heterogeneous Computing Systems},
  author = {Zhang, Longxin and Li, Kenli and Li, Changyun and Li, Keqin},
  date = {2017},
  journaltitle = {Information Sciences},
  volume = {379},
  pages = {241--256},
  issn = {0020-0255},
  doi = {10.1016/j.ins.2016.08.003},
  url = {https://www.sciencedirect.com/science/article/pii/S0020025516305722},
  abstract = {Recent studies focus primarily on low energy consumption or execution time for task scheduling with precedence constraints in heterogeneous computing systems. In most cases, system reliability is more important than other performance metrics. In addition, energy consumption and system reliability are two conflicting objectives. A novel bi-objective genetic algorithm (BOGA) to pursue low energy consumption and high system reliability for workflow scheduling is presented in this paper. The proposed BOGA offers users more flexibility when jobs are submitted to a data center. On the basis of real-world and randomly generated application graphs, numerous experiments are conducted to evaluate the performance of the proposed algorithm. In comparison with excellent algorithms such as multi-objective heterogeneous earliest finish time (MOHEFT) and multi-objective differential evolution (MODE), BOGA performs significantly better in terms of finding the spread of compromise solutions.},
  keywords = {Multi-objective,nosource,Precedence constraints,Reliability,Scheduling,Workflow},
  annotation = {98 citations (Crossref) [2022-08-03]}
}

@article{zhangDetectingGravitationalWaves2022,
  title = {Detecting Gravitational Waves from Extreme Mass Ratio Inspirals Using Convolutional Neural Networks},
  author = {Zhang, Xue-Ting and Messenger, Chris and Korsakova, Natalia and Chan, Man Leong and Hu, Yi-Ming and Zhang, Jian-dong},
  date = {2022-06-24},
  journaltitle = {Phys. Rev. D},
  volume = {105},
  number = {12},
  pages = {123027},
  publisher = {American Physical Society},
  doi = {10.1103/PhysRevD.105.123027},
  url = {https://link.aps.org/doi/10.1103/PhysRevD.105.123027},
  urldate = {2022-07-29},
  abstract = {Extreme mass ratio inspirals (EMRIs) are among the most interesting gravitational wave (GW) sources for space-borne GW detectors. However, successful GW data analysis remains challenging due to many issues, ranging from the difficulty of modeling accurate waveforms, to the impractically large template bank required by the traditional matched filtering search method. In this work, we introduce a proof-of-principle approach for EMRI detection based on convolutional neural networks (CNNs). We demonstrate the performance with simulated EMRI signals buried in Gaussian noise. We show that over a wide range of physical parameters, the network is effective for EMRI systems with a signal-to-noise ratio larger than 50, and the performance is most strongly related to the signal-to-noise ratio. The method also shows good generalization ability toward different waveform models. Our study reveals the potential applicability of machine learning technology like CNNs toward more realistic EMRI data analysis.},
  annotation = {0 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/NUGPRAKL/Zhang et al. - 2022 - Detecting gravitational waves from extreme mass ra.pdf;/Users/Nasy/Zotero/storage/XYRVWQY4/PhysRevD.105.html}
}

@unpublished{zhangDetectingGravitationalwavesExtreme2022,
  title = {Detecting {{Gravitational-waves}} from {{Extreme Mass Ratio Inspirals}} Using {{Convolutional Neural Networks}}},
  author = {Zhang, Xue-Ting and Messenger, Chris and Korsakova, Natalia and Chan, Man Leong and Hu, Yi-Ming and Zhang, Jing-dong},
  date = {2022-02-14},
  eprint = {2202.07158},
  eprinttype = {arXiv},
  eprintclass = {astro-ph, physics:gr-qc},
  url = {http://arxiv.org/abs/2202.07158},
  urldate = {2022-02-16},
  abstract = {Extreme mass ratio inspirals (EMRIs) are among the most interesting gravitational wave (GW) sources for space-borne GW detectors. However, successful GW data analysis remains challenging due to many issues, ranging from the difficulty of modeling accurate waveforms, to the impractically large template bank required by the traditional matched filtering search method. In this work, we introduce a proof-of-principle approach for EMRI detection based on convolutional neural networks (CNNs). We demonstrate the performance with simulated EMRI signals buried in Gaussian noise. We show that over a wide range of physical parameters, the network is effective for EMRI systems with a signal-to-noise ratio larger than 50, and the performance is most strongly related to the signal-to-noise ratio. The method also shows good generalization ability towards different waveform models. Our study reveals the potential applicability of machine learning technology like CNNs towards more realistic EMRI data analysis.},
  keywords = {Astrophysics - High Energy Astrophysical Phenomena,General Relativity and Quantum Cosmology},
  file = {/Users/Nasy/Zotero/storage/ZWV428QE/Zhang et al. - 2022 - Detecting Gravitational-waves from Extreme Mass Ra.pdf;/Users/Nasy/Zotero/storage/MZ7TA345/2202.html}
}

@inproceedings{zhangHeterogeneousGraphNeural2019,
  title = {Heterogeneous {{Graph Neural Network}}},
  booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Zhang, Chuxu and Song, Dongjin and Huang, Chao and Swami, Ananthram and Chawla, Nitesh V.},
  date = {2019-07-25},
  series = {{{KDD}} '19},
  pages = {793--803},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3292500.3330961},
  url = {https://doi.org/10.1145/3292500.3330961},
  abstract = {Representation learning in heterogeneous graphs aims to pursue a meaningful vector representation for each node so as to facilitate downstream applications such as link prediction, personalized recommendation, node classification, etc. This task, however, is challenging not only because of the demand to incorporate heterogeneous structural (graph) information consisting of multiple types of nodes and edges, but also due to the need for considering heterogeneous attributes or contents (e.g., text or image) associated with each node. Despite a substantial amount of effort has been made to homogeneous (or heterogeneous) graph embedding, attributed graph embedding as well as graph neural networks, few of them can jointly consider heterogeneous structural (graph) information as well as heterogeneous contents information of each node effectively. In this paper, we propose HetGNN, a heterogeneous graph neural network model, to resolve this issue. Specifically, we first introduce a random walk with restart strategy to sample a fixed size of strongly correlated heterogeneous neighbors for each node and group them based upon node types. Next, we design a neural network architecture with two modules to aggregate feature information of those sampled neighboring nodes. The first module encodes "deep" feature interactions of heterogeneous contents and generates content embedding for each node. The second module aggregates content (attribute) embeddings of different neighboring groups (types) and further combines them by considering the impacts of different groups to obtain the ultimate node embedding. Finally, we leverage a graph context loss and a mini-batch gradient descent procedure to train the model in an end-to-end manner. Extensive experiments on several datasets demonstrate that HetGNN can outperform state-of-the-art baselines in various graph mining tasks, i.e., link prediction, recommendation, node classification \& clustering and inductive node classification \& clustering.},
  isbn = {978-1-4503-6201-6},
  keywords = {graph embedding,graph neural networks,heterogeneous graphs},
  annotation = {214 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/CCBQNB5Q/Zhang et al. - 2019 - Heterogeneous Graph Neural Network.pdf}
}

@article{zhangHighthroughputDeterminationAntigen2018,
  title = {High-Throughput Determination of the Antigen Specificities of {{T}} Cell Receptors in Single Cells},
  author = {Zhang, Shu-Qi and Ma, Ke-Yue and Schonnesen, Alexandra A. and Zhang, Mingliang and He, Chenfeng and Sun, Eric and Williams, Chad M. and Jia, Weiping and Jiang, Ning},
  date = {2018-12},
  journaltitle = {Nat Biotechnol},
  volume = {36},
  number = {12},
  pages = {1156--1159},
  publisher = {Nature Publishing Group},
  issn = {1546-1696},
  doi = {10.1038/nbt.4282},
  url = {https://www.nature.com/articles/nbt.4282},
  urldate = {2022-03-19},
  abstract = {The antigen specificity of T cell receptors is measured at high throughput in single cells.},
  issue = {12},
  langid = {english},
  keywords = {Applied immunology,Immunotherapy,Molecular engineering,Next-generation sequencing,T-cell receptor},
  annotation = {84 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/TCR8X2YC/Zhang et al. - 2018 - High-throughput determination of the antigen speci.pdf;/Users/Nasy/Zotero/storage/98Q6RAQ5/nbt.html}
}

@article{zhangHyperbolicGraphAttention2022,
  title = {Hyperbolic {{Graph Attention Network}}},
  author = {Zhang, Yiding and Wang, Xiao and Shi, Chuan and Jiang, Xunqiang and Ye, Yanfang},
  date = {2022-12},
  journaltitle = {IEEE Transactions on Big Data},
  volume = {8},
  number = {6},
  pages = {1690--1701},
  issn = {2332-7790},
  doi = {10.1109/TBDATA.2021.3081431},
  abstract = {Graph neural network (GNN) has shown superior performance in dealing with structured graphs, which has attracted considerable research attention recently. Most of the existing GNNs are designed in euclidean spaces; however, real-world spatial structured data can be non-euclidean surfaces (e.g., hyperbolic spaces). For example, biologists may inspect the geometric shape of a protein surface to determine its interaction with other biomolecules for drug discovery. Although there is growing research on generalizing GNNs to non-euclidean surfaces, the works in these fields are still scarce. In this article, we exploit the graph attention network to learn robust node representations of graphs in hyperbolic spaces. As the gyrovector space framework provides an elegant algebraic formalism for hyperbolic geometry, we utilize this framework to learn the graph representations in hyperbolic spaces. Specifically, we first use the operations defined in the framework to transform the features in a graph; and we exploit the proximity in the product of hyperbolic spaces to model the multi-head attention mechanism in the non-Euclidean setting; afterward, we further devise a parallel strategy using logarithmic and exponential maps to improve the efficiency of our proposed model. The comprehensive experimental results demonstrate the effectiveness of the proposed model, compared with state-of-the-art methods.},
  eventtitle = {{{IEEE Transactions}} on {{Big Data}}},
  keywords = {Biological system modeling,Convolution,Data models,Deep learning,Geometry,graph neural network,Graph neural networks,hyperbolic space,Recommender systems,representation learning,Social networking (online)},
  annotation = {12 citations (Crossref) [2023-02-02]},
  file = {/Users/Nasy/Zotero/storage/24X3MWAC/Zhang et al. - 2022 - Hyperbolic Graph Attention Network.pdf;/Users/Nasy/Zotero/storage/Z67IH4NQ/9435103.html}
}

@article{zhangMappingFunctionalLandscape2021,
  title = {Mapping the Functional Landscape of {{T}} Cell Receptor Repertoires by Single-{{T}} Cell Transcriptomics},
  author = {Zhang, Ze and Xiong, Danyi and Wang, Xinlei and Liu, Hongyu and Wang, Tao},
  date = {2021-01},
  journaltitle = {Nat Methods},
  volume = {18},
  number = {1},
  pages = {92--99},
  publisher = {Nature Publishing Group},
  issn = {1548-7105},
  doi = {10.1038/s41592-020-01020-3},
  url = {https://www.nature.com/articles/s41592-020-01020-3},
  urldate = {2022-05-12},
  abstract = {Many experimental and bioinformatics approaches have been developed to characterize the human T cell receptor (TCR) repertoire. However, the unknown functional relevance of TCR profiling hinders unbiased interpretation of the biology of T cells. To address this inadequacy, we developed tessa, a tool to integrate TCRs with gene expression of T cells to estimate the effect that TCRs confer on the phenotypes of T cells. Tessa leveraged techniques combining single-cell RNA-sequencing with TCR sequencing. We validated tessa and showed its superiority over existing approaches that investigate only the TCR sequences. With tessa, we demonstrated that TCR similarity constrains the phenotypes of T cells to be similar and dictates a gradient in antigen targeting efficiency of T cell clonotypes with convergent TCRs. We showed this constraint could predict a functional dichotomization of T cells postimmunotherapy treatment and is weakened in tumor contexts.},
  issue = {1},
  langid = {english},
  keywords = {Adaptive immunity,Functional clustering,Lymphocytes,Software},
  annotation = {17 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/M2YYIAG2/Zhang et al. - 2021 - Mapping the functional landscape of T cell recepto.pdf}
}

@online{zhangMixupEmpiricalRisk2018,
  title = {Mixup: {{Beyond Empirical Risk Minimization}}},
  shorttitle = {Mixup},
  author = {Zhang, Hongyi and Cisse, Moustapha and Dauphin, Yann N. and Lopez-Paz, David},
  date = {2018-04-27},
  eprint = {1710.09412},
  eprinttype = {arXiv},
  doi = {10.48550/arXiv.1710.09412},
  url = {http://arxiv.org/abs/1710.09412},
  urldate = {2024-10-31},
  abstract = {Large deep neural networks are powerful, but exhibit undesirable behaviors such as memorization and sensitivity to adversarial examples. In this work, we propose mixup, a simple learning principle to alleviate these issues. In essence, mixup trains a neural network on convex combinations of pairs of examples and their labels. By doing so, mixup regularizes the neural network to favor simple linear behavior in-between training examples. Our experiments on the ImageNet-2012, CIFAR-10, CIFAR-100, Google commands and UCI datasets show that mixup improves the generalization of state-of-the-art neural network architectures. We also find that mixup reduces the memorization of corrupt labels, increases the robustness to adversarial examples, and stabilizes the training of generative adversarial networks.},
  pubstate = {prepublished},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/Users/Nasy/Zotero/storage/PVI8NEJA/Zhang et al. - 2018 - mixup Beyond Empirical Risk Minimization.pdf;/Users/Nasy/Zotero/storage/BIVHCEX5/1710.html}
}

@inproceedings{zhangPersonReIdentificationUsing2021,
  title = {Person {{Re-Identification Using Heterogeneous Local Graph Attention Networks}}},
  author = {Zhang, Zhong and Zhang, Haijia and Liu, Shuang},
  date = {2021},
  pages = {12136--12145},
  url = {https://openaccess.thecvf.com/content/CVPR2021/html/Zhang_Person_Re-Identification_Using_Heterogeneous_Local_Graph_Attention_Networks_CVPR_2021_paper.html},
  urldate = {2022-05-26},
  eventtitle = {Proceedings of the {{IEEE}}/{{CVF Conference}} on {{Computer Vision}} and {{Pattern Recognition}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/Z5XQTMFW/Zhang et al. - 2021 - Person Re-Identification Using Heterogeneous Local.pdf;/Users/Nasy/Zotero/storage/4GYBHQEI/Zhang_Person_Re-Identification_Using_Heterogeneous_Local_Graph_Attention_Networks_CVPR_2021_pap.html}
}

@article{zhangPIRDPanImmune2020,
  title = {{{PIRD}}: {{Pan Immune Repertoire Database}}},
  shorttitle = {{{PIRD}}},
  author = {Zhang, Wei and Wang, Longlong and Liu, Ke and Wei, Xiaofeng and Yang, Kai and Du, Wensi and Wang, Shiyu and Guo, Nannan and Ma, Chuanchuan and Luo, Lihua and Wu, Jinghua and Lin, Liya and Yang, Fan and Gao, Fei and Wang, Xie and Li, Tao and Zhang, Ruifang and Saksena, Nitin K. and Yang, Huanming and Wang, Jian and Fang, Lin and Hou, Yong and Xu, Xun and Liu, Xiao},
  date = {2020-02-01},
  journaltitle = {Bioinformatics},
  volume = {36},
  number = {3},
  eprint = {31373607},
  eprinttype = {pmid},
  pages = {897--903},
  issn = {1367-4811},
  doi = {10.1093/bioinformatics/btz614},
  abstract = {MOTIVATION: T and B cell receptors (TCRs and BCRs) play a pivotal role in the adaptive immune system by recognizing an enormous variety of external and internal antigens. Understanding these receptors is critical for exploring the process of immunoreaction and exploiting potential applications in immunotherapy and antibody drug design. Although a large number of samples have had their TCR and BCR repertoires sequenced using high-throughput sequencing in recent years, very few databases have been constructed to store these kinds of data. To resolve this issue, we developed a database. RESULTS: We developed a database, the Pan Immune Repertoire Database (PIRD), located in China National GeneBank (CNGBdb), to collect and store annotated TCR and BCR sequencing data, including from Homo sapiens and other species. In addition to data storage, PIRD also provides functions of data visualization and interactive online analysis. Additionally, a manually curated database of TCRs and BCRs targeting known antigens (TBAdb) was also deposited in PIRD. AVAILABILITY AND IMPLEMENTATION: PIRD can be freely accessed at https://db.cngb.org/pird.},
  langid = {english},
  keywords = {Antigens,Databases Factual,High-Throughput Nucleotide Sequencing,Humans,Immunotherapy,Receptors Antigen T-Cell},
  annotation = {14 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/9EQMVC5T/Zhang et al. - 2020 - PIRD Pan Immune Repertoire Database.pdf}
}

@article{zhaoHetNERecHeterogeneousNetwork2020,
  title = {{{HetNERec}}: {{Heterogeneous}} Network Embedding Based Recommendation},
  shorttitle = {{{HetNERec}}},
  author = {Zhao, Zhongying and Zhang, Xuejian and Zhou, Hui and Li, Chao and Gong, Maoguo and Wang, Yongqing},
  date = {2020-09-27},
  journaltitle = {Knowledge-Based Systems},
  volume = {204},
  pages = {106218},
  issn = {0950-7051},
  doi = {10.1016/j.knosys.2020.106218},
  url = {https://www.sciencedirect.com/science/article/pii/S0950705120304317},
  urldate = {2022-06-05},
  abstract = {Traditional recommendation techniques are hindered by the simplicity and sparsity of user-item interaction data and can be improved by introducing auxiliary information related to users and/or items. However, most studies have focused on a single typed external relationship and not fully utilized the latent relationships among users and items. In this paper, we propose a heterogeneous network embedding-based recommendation method called HetNERec. Specifically, we first construct the co-occurrence networks by extracting multiple co-occurrence relationships from a recommendation-oriented heterogeneous network. We then propose an integration function to integrate multiple network embedded representations into a single representation to enhance the recommendation performance. Finally, the matrix factorization is extended by integrating the embedded representations and considering the latent relationships among users and items. The experimental results on real-world datasets demonstrate that the proposed HetNERec outperforms several state-of-the-art recommendation methods.},
  langid = {english},
  keywords = {Heterogeneous network,Heterogeneous network embedding,Network embedding,Recommender system},
  annotation = {33 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/UBIK6PII/Zhao et al. - 2020 - HetNERec Heterogeneous network embedding based re.pdf;/Users/Nasy/Zotero/storage/4J9J574N/S0950705120304317.html}
}

@inproceedings{zhaoIntentGCScalableGraph2019,
  title = {{{IntentGC}}: {{A Scalable Graph Convolution Framework Fusing Heterogeneous Information}} for {{Recommendation}}},
  shorttitle = {{{IntentGC}}},
  booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Zhao, Jun and Zhou, Zhou and Guan, Ziyu and Zhao, Wei and Ning, Wei and Qiu, Guang and He, Xiaofei},
  date = {2019-07-25},
  series = {{{KDD}} '19},
  pages = {2347--2357},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3292500.3330686},
  url = {https://doi.org/10.1145/3292500.3330686},
  urldate = {2022-06-05},
  abstract = {The remarkable progress of network embedding has led to state-of-the-art algorithms in recommendation. However, the sparsity of user-item interactions (i.e., explicit preferences) on websites remains a big challenge for predicting users' behaviors. Although research efforts have been made in utilizing some auxiliary information (e.g., social relations between users) to solve the problem, the existing rich heterogeneous auxiliary relationships are still not fully exploited. Moreover, previous works relied on linearly combined regularizers and suffered parameter tuning. In this work, we collect abundant relationships from common user behaviors and item information, and propose a novel framework named IntentGC to leverage both explicit preferences and heterogeneous relationships by graph convolutional networks. In addition to the capability of modeling heterogeneity, IntentGC can learn the importance of different relationships automatically by the neural model in a nonlinear sense. To apply IntentGC to web-scale applications, we design a faster graph convolutional model named IntentNet by avoiding unnecessary feature interactions. Empirical experiments on two large-scale real-world datasets and online A/B tests in Alibaba demonstrate the superiority of our method over state-of-the-art algorithms. We also release the source code of our work at https://github.com/peter14121/intentgc-models.},
  isbn = {978-1-4503-6201-6},
  keywords = {graph convolutional networks,heterogeneous information network,recommendation},
  annotation = {41 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/M6QZIYJX/Zhao et al. - 2019 - IntentGC A Scalable Graph Convolution Framework F.pdf}
}

@article{zhaoOptimizingConvolutionalNeural2018,
  title = {Optimizing {{Convolutional Neural Networks}} on the {{Sunway TaihuLight Supercomputer}}},
  author = {Zhao, Wenlai and Fu, Haohuan and Fang, Jiarui and Zheng, Weijie and Gan, Lin and Yang, Guangwen},
  date = {2018-03-22},
  journaltitle = {ACM Trans. Archit. Code Optim.},
  volume = {15},
  number = {1},
  pages = {13:1--13:26},
  issn = {1544-3566},
  doi = {10.1145/3177885},
  url = {https://doi.org/10.1145/3177885},
  urldate = {2022-03-13},
  abstract = {The Sunway TaihuLight supercomputer is powered by SW26010, a new 260-core processor designed with on-chip fusion of heterogeneous cores. In this article, we present our work on optimizing the training process of convolutional neural networks (CNNs) on the Sunway TaihuLight supercomputer. Specifically, a highly efficient library (swDNN) and a customized Caffe framework (swCaffe) are proposed. Architecture-oriented optimization methods targeting the many-core architecture of SW26010 are introduced and are able to achieve 48× speedup for the convolution routine in swDNN and 4× speedup for the complete training process of the VGG-16 network using swCaffe, compared to the unoptimized algorithm and framework. Compared to the cuDNN library and the Caffe framework based on the NVIDIA K40m GPU, the proposed swDNN library and swCaffe framework on SW26010 have nearly half the performance of K40m in single -precision and have 3.6× and 1.8× speedup over K40m in double precision, respectively.},
  keywords = {Convolutional neural network,deep learning,heterogeneous many-core architecture,Sunway TaihuLight supercomputer},
  annotation = {11 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/ANSKJGQE/Zhao et al. - 2018 - Optimizing Convolutional Neural Networks on the Su.pdf}
}

@article{zhaoSpacebasedGravitationalWave2022,
  title = {Space-Based Gravitational Wave Signal Detection and Extraction with Deep Neural Network},
  author = {Zhao, Tianyu and Lyu, Ruoxi and Ren, Zhixiang and Wang, He and Cao, Zhoujian},
  date = {2022-07-15},
  doi = {10.48550/arXiv.2207.07414},
  url = {https://arxiv.org/abs/2207.07414v2},
  urldate = {2022-07-29},
  abstract = {Space-based gravitational wave (GW) detectors will be able to observe signals from sources that are otherwise nearly impossible from current ground-based detection. Consequently, the well established signal detection method, matched filtering, will require a complex template bank, leading to a computational cost that is too expensive in practice. Here, we develop a high-accuracy GW signal detection and extraction method for all space-based GW sources. As a proof of concept, we show that a science-driven and uniform multi-stage deep neural network can identify synthetic signals that are submerged in Gaussian noise. Our method has more than 99\% accuracy for signal detection of various sources while obtaining at least 95\% similarity compared with target signals. We further demonstrate the interpretability and strong generalization behavior for several extended scenarios.},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/3SGZ4BMD/2207.html}
}

@article{zhaoSpacebasedGravitationalWave2022a,
  title = {Space-Based Gravitational Wave Signal Detection and Extraction with Deep Neural Network},
  author = {Zhao, Tianyu and Lyu, Ruoxi and Ren, Zhixiang and Wang, He and Cao, Zhoujian},
  date = {2022-07-15},
  doi = {10.48550/arXiv.2207.07414},
  url = {https://arxiv.org/abs/2207.07414v2},
  urldate = {2022-07-29},
  abstract = {Space-based gravitational wave (GW) detectors will be able to observe signals from sources that are otherwise nearly impossible from current ground-based detection. Consequently, the well established signal detection method, matched filtering, will require a complex template bank, leading to a computational cost that is too expensive in practice. Here, we develop a high-accuracy GW signal detection and extraction method for all space-based GW sources. As a proof of concept, we show that a science-driven and uniform multi-stage deep neural network can identify synthetic signals that are submerged in Gaussian noise. Our method has more than 99\% accuracy for signal detection of various sources while obtaining at least 95\% similarity compared with target signals. We further demonstrate the interpretability and strong generalization behavior for several extended scenarios.},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/LVWT4DHQ/Zhao et al. - 2022 - Space-based gravitational wave signal detection an.pdf;/Users/Nasy/Zotero/storage/JXWVBGZ2/2207.html}
}

@article{zhaoSpacebasedGravitationalWave2022b,
  title = {Space-Based Gravitational Wave Signal Detection and Extraction with Deep Neural Network},
  author = {Zhao, Tianyu and Lyu, Ruoxi and Ren, Zhixiang and Wang, He and Cao, Zhoujian},
  date = {2022-07-15},
  doi = {10.48550/arXiv.2207.07414},
  url = {https://arxiv.org/abs/2207.07414v2},
  urldate = {2022-07-29},
  abstract = {Space-based gravitational wave (GW) detectors will be able to observe signals from sources that are otherwise nearly impossible from current ground-based detection. Consequently, the well established signal detection method, matched filtering, will require a complex template bank, leading to a computational cost that is too expensive in practice. Here, we develop a high-accuracy GW signal detection and extraction method for all space-based GW sources. As a proof of concept, we show that a science-driven and uniform multi-stage deep neural network can identify synthetic signals that are submerged in Gaussian noise. Our method has more than 99\% accuracy for signal detection of various sources while obtaining at least 95\% similarity compared with target signals. We further demonstrate the interpretability and strong generalization behavior for several extended scenarios.},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/RG7D9DZG/Zhao et al. - 2022 - Space-based gravitational wave signal detection an.pdf;/Users/Nasy/Zotero/storage/2H6YW563/2207.html}
}

@article{zhengTransformingDomainIntegrals1992,
  title = {Transforming the Domain Integrals to the Boundary Using Approximate Particular Solutions: A Boundary Element Approach for Nonlinear Problems},
  shorttitle = {Transforming the Domain Integrals to the Boundary Using Approximate Particular Solutions},
  author = {Zheng, R. and Phan-Thien, N.},
  date = {1992-10-01},
  journaltitle = {Applied Numerical Mathematics},
  volume = {10},
  number = {5},
  pages = {435--445},
  issn = {0168-9274},
  doi = {10.1016/0168-9274(92)90061-H},
  url = {https://www.sciencedirect.com/science/article/pii/016892749290061H},
  urldate = {2022-10-27},
  abstract = {A boundary element method is developed for solving nonlinear problems. This method transforms the domain integrals onto the boundary with the use of an approximate particular solution obtained by expressing the nonlinearities in terms of a linear combination of radial basis functions. Numerical examples show that the method is efficient and can produce accurate results.},
  langid = {english},
  keywords = {Boundary element method,nonlinear problems,particular solution},
  annotation = {12 citations (Crossref) [2022-11-04]},
  file = {/Users/Nasy/Zotero/storage/BP9Y3IR9/016892749290061H.html}
}

@article{zhongRootalignedSMILESTight2022,
  title = {Root-Aligned {{SMILES}}: A Tight Representation for Chemical Reaction Prediction},
  shorttitle = {Root-Aligned {{SMILES}}},
  author = {Zhong, Zipeng and Song, Jie and Feng, Zunlei and Liu, Tiantao and Jia, Lingxiang and Yao, Shaolun and Wu, Min and Hou, Tingjun and Song, Mingli},
  date = {2022-08-10},
  journaltitle = {Chem. Sci.},
  volume = {13},
  number = {31},
  pages = {9023--9034},
  publisher = {The Royal Society of Chemistry},
  issn = {2041-6539},
  doi = {10.1039/D2SC02763A},
  url = {https://pubs.rsc.org/en/content/articlelanding/2022/sc/d2sc02763a},
  urldate = {2024-07-11},
  abstract = {Chemical reaction prediction, involving forward synthesis and retrosynthesis prediction, is a fundamental problem in organic synthesis. A popular computational paradigm formulates synthesis prediction as a sequence-to-sequence translation problem, where the typical SMILES is adopted for molecule representations. However, the general-purpose SMILES neglects the characteristics of chemical reactions, where the molecular graph topology is largely unaltered from reactants to products, resulting in the suboptimal performance of SMILES if straightforwardly applied. In this article, we propose the root-aligned SMILES (R-SMILES), which specifies a tightly aligned one-to-one mapping between the product and the reactant SMILES for more efficient synthesis prediction. Due to the strict one-to-one mapping and reduced edit distance, the computational model is largely relieved from learning the complex syntax and dedicated to learning the chemical knowledge for reactions. We compare the proposed R-SMILES with various state-of-the-art baselines and show that it significantly outperforms them all, demonstrating the superiority of the proposed method.},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/FRJ35P52/Zhong et al. - 2022 - Root-aligned SMILES a tight representation for chemical reaction prediction.pdf;/Users/Nasy/Zotero/storage/G6TGIBRZ/Zhong et al. - 2022 - Root-aligned SMILES a tight representation for chemical reaction prediction.pdf}
}

@inproceedings{zhouLargeLanguageModels2023,
  title = {Large {{Language Models}} Are {{Human-Level Prompt Engineers}}},
  author = {Zhou, Yongchao and Muresanu, Andrei Ioan and Han, Ziwen and Paster, Keiran and Pitis, Silviu and Chan, Harris and Ba, Jimmy},
  date = {2023-02-01},
  url = {https://openreview.net/forum?id=92gvk82DE-},
  abstract = {By conditioning on natural language instructions, large language models (LLMs) have displayed impressive capabilities as general-purpose computers. However, task performance depends significantly on the quality of the prompt used to steer the model, and most effective prompts have been handcrafted by humans. Inspired by classical program synthesis and the human approach to prompt engineering, we propose Automatic Prompt Engineer (APE) for automatic instruction generation and selection. In our method, we treat the instruction as the "program," optimized by searching over a pool of instruction candidates proposed by an LLM in order to maximize a chosen score function. To evaluate the quality of the selected instruction, we evaluate the zero-shot performance of another LLM following the selected instruction. Experiments on 24 NLP tasks show that our automatically generated instructions outperform the prior LLM baseline by a large margin and achieve better or comparable performance to the instructions generated by human annotators on 21/24 tasks. We conduct extensive qualitative and quantitative analyses to explore the performance of APE. We show that APE-engineered prompts can be applied to steer models toward truthfulness and/or informativeness, as well as to improve few-shot learning performance by simply prepending them to standard in-context learning prompts.},
  eventtitle = {The {{Eleventh International Conference}} on {{Learning Representations}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/C8YCNWV5/Zhou et al. - 2023 - Large Language Models are Human-Level Prompt Engin.pdf}
}

@inproceedings{zhouLeasttoMostPromptingEnables2023,
  title = {Least-to-{{Most Prompting Enables Complex Reasoning}} in {{Large Language Models}}},
  author = {Zhou, Denny and Schärli, Nathanael and Hou, Le and Wei, Jason and Scales, Nathan and Wang, Xuezhi and Schuurmans, Dale and Cui, Claire and Bousquet, Olivier and Le, Quoc V. and Chi, Ed H.},
  date = {2023-02-01},
  url = {https://openreview.net/forum?id=WZH7099tgfM},
  abstract = {Chain-of-thought prompting has demonstrated remarkable performance on various natural language reasoning tasks. However, it tends to perform poorly on tasks which requires solving problems harder than the exemplars shown in the prompts. To overcome this challenge of easy-to-hard generalization, we propose a novel prompting strategy, least-to-most prompting. The key idea in this strategy is to break down a complex problem into a series of simpler subproblems and then solve them in sequence. Solving each subproblem is facilitated by the answers to previously solved subproblems. Our experimental results on tasks related to symbolic manipulation, compositional generalization, and math reasoning reveal that least-to-most prompting is capable of generalizing to more difficult problems than those seen in the prompts. A notable finding is that when the GPT-3 code-davinci-002 model is used with least-to-most prompting, it can solve the compositional generalization benchmark SCAN in any split (including length split) with an accuracy of at least 99\textbackslash\% using just 14 exemplars, compared to only 16\textbackslash\% accuracy with chain-of-thought prompting. This is particularly noteworthy because neural-symbolic models in the literature that specialize in solving SCAN are trained on the entire training set containing over 15,000 examples. We have included prompts for all the tasks in the Appendix.},
  eventtitle = {The {{Eleventh International Conference}} on {{Learning Representations}}},
  langid = {english},
  file = {/Users/Nasy/Zotero/storage/UDIWNVD4/Zhou et al. - 2023 - Least-to-Most Prompting Enables Complex Reasoning .pdf}
}

@inproceedings{zhuHGCNHeterogeneousGraph2020,
  title = {{{HGCN}}: {{A Heterogeneous Graph Convolutional Network-Based Deep Learning Model Toward Collective Classification}}},
  shorttitle = {{{HGCN}}},
  booktitle = {Proceedings of the 26th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
  author = {Zhu, Zhihua and Fan, Xinxin and Chu, Xiaokai and Bi, Jingping},
  date = {2020-08-23},
  series = {{{KDD}} '20},
  pages = {1161--1171},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3394486.3403169},
  url = {https://doi.org/10.1145/3394486.3403169},
  urldate = {2022-05-30},
  abstract = {Collective classification, as an important technique to study networked data, aims to exploit the label autocorrelation for a group of inter-connected entities with complex dependencies. As the emergence of various heterogeneous information networks (HINs), collective classification at present is confronting several severe challenges stemming from the heterogeneity of HINs, such as complex relational hierarchy, potential incompatible semantics and node-context relational semantics. To address the challenges, in this paper, we propose a novel heterogeneous graph convolutional network-based deep learning model, called HGCN, to collectively categorize the entities in HINs. Our work involves three primary contributions: i) HGCN not only learns the latent relations from the relation-sophisticated HINs via multi-layer heterogeneous convolutions, but also captures the semantic incompatibility among relations with properly-learned edge-level filter parameters; ii) to preserve the fine-grained relational semantics of different-type nodes, we propose a heterogeneous graph convolution to directly tackle the original HINs without any in advance transforming the network from heterogeneity to homogeneity; iii) we perform extensive experiments using four real-world datasets to validate our proposed HGCN, the multi-facet results show that our proposed HGCN can significantly improve the performance of collective classification compared with the state-of-the-art baseline methods.},
  isbn = {978-1-4503-7998-4},
  keywords = {collective classification,heterogeneous graph convolutional network,heterogeneous information network,relational learning},
  annotation = {6 citations (Crossref) [2022-08-03]},
  file = {/Users/Nasy/Zotero/storage/4NLDNCI6/Zhu et al. - 2020 - HGCN A Heterogeneous Graph Convolutional Network-.pdf}
}

@online{zhuMiniGPT4EnhancingVisionLanguage2023,
  title = {{{MiniGPT-4}}: {{Enhancing Vision-Language Understanding}} with {{Advanced Large Language Models}}},
  shorttitle = {{{MiniGPT-4}}},
  author = {Zhu, Deyao and Chen, Jun and Shen, Xiaoqian and Li, Xiang and Elhoseiny, Mohamed},
  date = {2023-04-20},
  eprint = {2304.10592},
  eprinttype = {arXiv},
  eprintclass = {cs},
  doi = {10.48550/arXiv.2304.10592},
  url = {http://arxiv.org/abs/2304.10592},
  urldate = {2023-07-16},
  abstract = {The recent GPT-4 has demonstrated extraordinary multi-modal abilities, such as directly generating websites from handwritten text and identifying humorous elements within images. These features are rarely observed in previous vision-language models. We believe the primary reason for GPT-4's advanced multi-modal generation capabilities lies in the utilization of a more advanced large language model (LLM). To examine this phenomenon, we present MiniGPT-4, which aligns a frozen visual encoder with a frozen LLM, Vicuna, using just one projection layer. Our findings reveal that MiniGPT-4 possesses many capabilities similar to those exhibited by GPT-4 like detailed image description generation and website creation from hand-written drafts. Furthermore, we also observe other emerging capabilities in MiniGPT-4, including writing stories and poems inspired by given images, providing solutions to problems shown in images, teaching users how to cook based on food photos, etc. In our experiment, we found that only performing the pretraining on raw image-text pairs could produce unnatural language outputs that lack coherency including repetition and fragmented sentences. To address this problem, we curate a high-quality, well-aligned dataset in the second stage to finetune our model using a conversational template. This step proved crucial for augmenting the model's generation reliability and overall usability. Notably, our model is highly computationally efficient, as we only train a projection layer utilizing approximately 5 million aligned image-text pairs. Our code, pre-trained model, and collected dataset are available at https://minigpt-4.github.io/.},
  pubstate = {prepublished},
  keywords = {Computer Science - Computer Vision and Pattern Recognition},
  file = {/Users/Nasy/Zotero/storage/7MQUUR8E/Zhu et al. - 2023 - MiniGPT-4 Enhancing Vision-Language Understanding.pdf;/Users/Nasy/Zotero/storage/22UPFI46/2304.html}
}

@online{ZINCCatalogSigma,
  title = {{{ZINC Catalog Sigma Aldrich}} ({{Building Blocks}}) | {{ZINC Is Not Commercial}} - {{A}} Database of Commercially-Available Compounds},
  url = {http://zinc12.docking.org/catalogs/sialbb},
  urldate = {2024-07-11}
}

@online{ZoteroSettingsFeeds,
  title = {Zotero | {{Settings}} {$>$} {{Feeds}}/{{API}} {$>$} {{New Key}}},
  url = {https://www.zotero.org/settings/keys/new},
  urldate = {2022-03-11},
  keywords = {nosource}
}