FAIRmat-NFDI · ladinesa · Jan 20, 2025 · Jan 20, 2025 · Jan 20, 2025 · Jan 21, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -122,6 +122,8 @@ where = ["src"]
 [project.entry-points.'nomad.plugin']
 exciting_parser_entry_point = "nomad_simulation_parsers.parsers:exciting_parser_entry_point"
 exciting_schema_package_entry_point = "nomad_simulation_parsers.schema_packages:exciting_schema_package_entry_point"
+vasp_parser_entry_point = "nomad_simulation_parsers.parsers:vasp_parser_entry_point"
+vasp_schema_package_entry_point = "nomad_simulation_parsers.schema_packages:vasp_schema_package_entry_point"
 
 
 

diff --git a/src/nomad_simulation_parsers/parsers/__init__.py b/src/nomad_simulation_parsers/parsers/__init__.py
@@ -1,12 +1,20 @@
 from nomad.config.models.plugins import ParserEntryPoint
+from pydantic import Field
 
 
 class EntryPoint(ParserEntryPoint):
+    parser_class_name: str = Field(
+        description="""
+        The fully qualified name of the Python class that implements the parser.
+        This class must have a function `def parse(self, mainfile, archive, logger)`.
+    """
+    )
+
     def load(self):
         from nomad.parsing.parser import MatchingParserInterface
 
         return MatchingParserInterface(
-            parser_class_name='nomad_simulation_parsers.parsers.exciting.parser.ExcitingParser',
+            self.parser_class_name,
             **self.dict(),
         )
 
@@ -15,9 +23,27 @@ def load(self):
     name='parsers/exciting',
     aliases=['parsers/exciting'],
     description='NOMAD parser for EXCITING.',
+    parser_class_name='nomad_simulation_parsers.parsers.exciting.parser.ExcitingParser',
     python_package='nomad_simulation_parsers',
     mainfile_contents_re=r'EXCITING.*started[\s\S]+?All units are atomic ',
     mainfile_name_re=r'^.*.OUT(\.[^/]*)?$',
     code_name='exciting',
     code_homepage='http://exciting-code.org/',
 )
+
+vasp_parser_entry_point = EntryPoint(
+    name='parsers/vasp',
+    description='Parser for VASP XML and OUTCAR outputs',
+    parser_class_name='nomad_simulation_parsers.parsers.vasp.parser.VASPParser',
+    python_package='nomad_simulation_parsers',
+    code_name='VASP',
+    mainfile_contents_re=(
+        r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*?\s*<modeling>?\s*'
+        r'<generator>?\s*<i name="program" type="string">\s*vasp\s*</i>?|'
+        r'^\svasp[\.\d]+.+?(?:\(build|complex)[\s\S]+?executed on'
+    ),
+    mainfile_mime_re='(application/.*)|(text/.*)',
+    mainfile_name_re='.*[^/]*xml[^/]*',
+    mainfile_alternative=True,
+    supported_compressions=['gz', 'bz2', 'xz'],
+)
diff --git a/src/nomad_simulation_parsers/parsers/exciting/parser.py b/src/nomad_simulation_parsers/parsers/exciting/parser.py
@@ -20,8 +20,7 @@
 from nomad.units import ureg
 from nomad_simulations.schema_packages.general import Simulation
 
-import nomad_simulation_parsers.schema_packages.exciting  # noqa
-from nomad_simulation_parsers.parsers.utils import search_files
+from nomad_simulation_parsers.parsers.utils.general import search_files, remove_mapping_annotations
 
 from .eigval_reader import EigvalReader
 from .info_reader import InfoReader
@@ -143,6 +142,8 @@ class ExcitingParser(Parser):
     def parse(
         self, mainfile: str, archive: 'EntryArchive', logger: 'BoundLogger'
     ) -> None:
+        from nomad_simulation_parsers.schema_packages import exciting
+
         maindir = os.path.dirname(mainfile)
         mainbase = os.path.basename(mainfile)
 
@@ -165,6 +166,7 @@ def parse(
             input_xml_parser = InputXMLParser(filepath=input_xml_files[0])
             data_parser.annotation_key = 'input_xml'
             input_xml_parser.convert(data_parser)
+            input_xml_parser.close()
 
         # eigenvalues from eigval.out
         eigval_files = search_files('EIGVAL.OUT', maindir, mainbase)
@@ -174,7 +176,7 @@ def parse(
             )
             data_parser.annotation_key = 'eigval'
             eigval_parser.convert(data_parser, update_mode='merge@-1')
-            self.eigval_parser = eigval_parser
+            eigval_parser.close()
 
         # bandstructure from bandstructure.xml
         bandstructure_files = search_files('bandstructure.xml', maindir, mainbase)
@@ -185,21 +187,21 @@ def parse(
             # TODO set n_spin from info
             data_parser.annotation_key = 'bandstructure_xml'
             bandstructure_parser.convert(data_parser, update_mode='merge@-1')
-            self.bandstructure_parser = bandstructure_parser
+            bandstructure_parser.close()
 
         # dos from dos.xml
         dos_files = search_files('dos.xml', maindir, mainbase)
         if dos_files:
             dos_parser = DosXMLParser(filepath=dos_files[0])
             data_parser.annotation_key = 'dos_xml'
             dos_parser.convert(data_parser, update_mode='merge@-1')
-            self.dos_parser = dos_parser
+            dos_parser.close()
 
         archive.data = data_parser.data_object
 
-        self.info_parser = info_parser
         # close parsers
-        # info_parser.close()
-        # input_xml_parser.close()
-        # eigval_parser.close()
-        # data_parser.close()
+        info_parser.close()
+        data_parser.close()
+
+        # remove annotations
+        remove_mapping_annotations(exciting.general.Simulation.m_def)
diff --git a/src/nomad_simulation_parsers/parsers/parser.py b/src/nomad_simulation_parsers/parsers/parser.py
diff --git a/src/nomad_simulation_parsers/parsers/utils.py b/src/nomad_simulation_parsers/parsers/utils.py
diff --git a/src/nomad_simulation_parsers/parsers/utils/general.py b/src/nomad_simulation_parsers/parsers/utils/general.py
@@ -0,0 +1,82 @@
+import os
+import re
+from glob import glob
+from typing import Union
+
+from nomad.metainfo import Section, SubSection
+
+
+def search_files(
+    pattern: str,
+    basedir: str,
+    deep: bool = True,
+    max_dirs: int = 10,
+    re_pattern: str = '',
+) -> list[str]:
+    """Search files following the `pattern` starting from `basedir`. The search is
+    performed recursively in all sub-folders (deep=True) or parent folders (deep=False).
+    A futher regex search with `re_pattern` is done to filter the matching files.
+
+    Args:
+        pattern (str): pattern to match the files in the folder
+        basedir (str): directory to start the search
+        deep (bool, optional): folders search direction (True=down, False=up)
+        re_pattern (str, optional): additional regex pattern to filter matching files
+
+    Returns:
+        list: list of matching files
+    """
+
+    for _ in range(max_dirs):
+        filenames = glob(f'{basedir}/{pattern}')
+        pattern = os.path.join('**' if deep else '..', pattern)
+        if filenames:
+            break
+
+    if len(filenames) > 1:
+        # filter files that match
+        matches = [f for f in filenames if re.search(re_pattern, f)]
+        filenames = matches if matches else filenames
+
+    filenames = [f for f in filenames if os.access(f, os.F_OK)]
+    return filenames
+
+
+def remove_mapping_annotations(property: Section, max_depth: int = 5) -> None:
+    """
+    Remove mapping annotations from the input section definition, all its quantities
+    and sub-sections recursively.
+
+    Args:
+        property (Section): The section definition to remove the annotations from.
+        max_depth (int, optional): The maximum depth of the recursion for sub-sections
+            using the same section as parent.
+    """
+
+    def _remove(property: Union[Section, SubSection], depth: int = 0):
+        if depth > max_depth:
+            return
+
+        annotation_key = 'mapping'
+        property.m_annotations.pop(annotation_key, None)
+
+        depth += 1
+        property_section = (
+            property.sub_section if isinstance(property, SubSection) else property
+        )
+        for quantity in property_section.all_quantities.values():
+            quantity.m_annotations.pop(annotation_key, None)
+
+        for sub_section in property_section.all_sub_sections.values():
+            if sub_section.m_annotations.get(annotation_key):
+                _remove(sub_section, depth)
+            elif sub_section.sub_section.m_annotations.get(annotation_key):
+                _remove(sub_section.sub_section, depth)
+            else:
+                for (
+                    inheriting_section
+                ) in sub_section.sub_section.all_inheriting_sections:
+                    if inheriting_section.m_annotations.get(annotation_key):
+                        _remove(inheriting_section, depth)
+
+    _remove(property)