Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vasp mapping parser #3

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ where = ["src"]
[project.entry-points.'nomad.plugin']
exciting_parser_entry_point = "nomad_simulation_parsers.parsers:exciting_parser_entry_point"
exciting_schema_package_entry_point = "nomad_simulation_parsers.schema_packages:exciting_schema_package_entry_point"
vasp_parser_entry_point = "nomad_simulation_parsers.parsers:vasp_parser_entry_point"
vasp_schema_package_entry_point = "nomad_simulation_parsers.schema_packages:vasp_schema_package_entry_point"



Expand Down
28 changes: 27 additions & 1 deletion src/nomad_simulation_parsers/parsers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
from nomad.config.models.plugins import ParserEntryPoint
from pydantic import Field


class EntryPoint(ParserEntryPoint):
parser_class_name: str = Field(
description="""
The fully qualified name of the Python class that implements the parser.
This class must have a function `def parse(self, mainfile, archive, logger)`.
"""
)

def load(self):
from nomad.parsing.parser import MatchingParserInterface

return MatchingParserInterface(
parser_class_name='nomad_simulation_parsers.parsers.exciting.parser.ExcitingParser',
self.parser_class_name,
**self.dict(),
)

Expand All @@ -15,9 +23,27 @@ def load(self):
name='parsers/exciting',
aliases=['parsers/exciting'],
description='NOMAD parser for EXCITING.',
parser_class_name='nomad_simulation_parsers.parsers.exciting.parser.ExcitingParser',
python_package='nomad_simulation_parsers',
mainfile_contents_re=r'EXCITING.*started[\s\S]+?All units are atomic ',
mainfile_name_re=r'^.*.OUT(\.[^/]*)?$',
code_name='exciting',
code_homepage='http://exciting-code.org/',
)

vasp_parser_entry_point = EntryPoint(
name='parsers/vasp',
description='Parser for VASP XML and OUTCAR outputs',
parser_class_name='nomad_simulation_parsers.parsers.vasp.parser.VASPParser',
python_package='nomad_simulation_parsers',
code_name='VASP',
mainfile_contents_re=(
r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*?\s*<modeling>?\s*'
r'<generator>?\s*<i name="program" type="string">\s*vasp\s*</i>?|'
r'^\svasp[\.\d]+.+?(?:\(build|complex)[\s\S]+?executed on'
),
Comment on lines +40 to +44
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would put the HDF5 parser above xml in the matching priority

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

which hdf5 parser?

mainfile_mime_re='(application/.*)|(text/.*)',
mainfile_name_re='.*[^/]*xml[^/]*',
mainfile_alternative=True,
supported_compressions=['gz', 'bz2', 'xz'],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't the compression support be specified at a higher level?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is in the correct place, in MatchingParser

)
22 changes: 12 additions & 10 deletions src/nomad_simulation_parsers/parsers/exciting/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
from nomad.units import ureg
from nomad_simulations.schema_packages.general import Simulation

import nomad_simulation_parsers.schema_packages.exciting # noqa
from nomad_simulation_parsers.parsers.utils import search_files
from nomad_simulation_parsers.parsers.utils.general import search_files, remove_mapping_annotations

from .eigval_reader import EigvalReader
from .info_reader import InfoReader
Expand Down Expand Up @@ -143,6 +142,8 @@ class ExcitingParser(Parser):
def parse(
self, mainfile: str, archive: 'EntryArchive', logger: 'BoundLogger'
) -> None:
from nomad_simulation_parsers.schema_packages import exciting

maindir = os.path.dirname(mainfile)
mainbase = os.path.basename(mainfile)

Expand All @@ -165,6 +166,7 @@ def parse(
input_xml_parser = InputXMLParser(filepath=input_xml_files[0])
data_parser.annotation_key = 'input_xml'
input_xml_parser.convert(data_parser)
input_xml_parser.close()

# eigenvalues from eigval.out
eigval_files = search_files('EIGVAL.OUT', maindir, mainbase)
Expand All @@ -174,7 +176,7 @@ def parse(
)
data_parser.annotation_key = 'eigval'
eigval_parser.convert(data_parser, update_mode='merge@-1')
self.eigval_parser = eigval_parser
eigval_parser.close()

# bandstructure from bandstructure.xml
bandstructure_files = search_files('bandstructure.xml', maindir, mainbase)
Expand All @@ -185,21 +187,21 @@ def parse(
# TODO set n_spin from info
data_parser.annotation_key = 'bandstructure_xml'
bandstructure_parser.convert(data_parser, update_mode='merge@-1')
self.bandstructure_parser = bandstructure_parser
bandstructure_parser.close()

# dos from dos.xml
dos_files = search_files('dos.xml', maindir, mainbase)
if dos_files:
dos_parser = DosXMLParser(filepath=dos_files[0])
data_parser.annotation_key = 'dos_xml'
dos_parser.convert(data_parser, update_mode='merge@-1')
self.dos_parser = dos_parser
dos_parser.close()

archive.data = data_parser.data_object

self.info_parser = info_parser
# close parsers
# info_parser.close()
# input_xml_parser.close()
# eigval_parser.close()
# data_parser.close()
info_parser.close()
data_parser.close()

# remove annotations
remove_mapping_annotations(exciting.general.Simulation.m_def)
32 changes: 0 additions & 32 deletions src/nomad_simulation_parsers/parsers/parser.py

This file was deleted.

39 changes: 0 additions & 39 deletions src/nomad_simulation_parsers/parsers/utils.py

This file was deleted.

82 changes: 82 additions & 0 deletions src/nomad_simulation_parsers/parsers/utils/general.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import os
import re
from glob import glob
from typing import Union

from nomad.metainfo import Section, SubSection


def search_files(
pattern: str,
basedir: str,
deep: bool = True,
max_dirs: int = 10,
re_pattern: str = '',
) -> list[str]:
"""Search files following the `pattern` starting from `basedir`. The search is
performed recursively in all sub-folders (deep=True) or parent folders (deep=False).
A futher regex search with `re_pattern` is done to filter the matching files.

Args:
pattern (str): pattern to match the files in the folder
basedir (str): directory to start the search
deep (bool, optional): folders search direction (True=down, False=up)
re_pattern (str, optional): additional regex pattern to filter matching files

Returns:
list: list of matching files
"""

for _ in range(max_dirs):
filenames = glob(f'{basedir}/{pattern}')
pattern = os.path.join('**' if deep else '..', pattern)
if filenames:
break

if len(filenames) > 1:
# filter files that match
matches = [f for f in filenames if re.search(re_pattern, f)]
filenames = matches if matches else filenames

filenames = [f for f in filenames if os.access(f, os.F_OK)]
return filenames


def remove_mapping_annotations(property: Section, max_depth: int = 5) -> None:
"""
Remove mapping annotations from the input section definition, all its quantities
and sub-sections recursively.

Args:
property (Section): The section definition to remove the annotations from.
max_depth (int, optional): The maximum depth of the recursion for sub-sections
using the same section as parent.
"""

def _remove(property: Union[Section, SubSection], depth: int = 0):
if depth > max_depth:
return

annotation_key = 'mapping'
property.m_annotations.pop(annotation_key, None)

depth += 1
property_section = (
property.sub_section if isinstance(property, SubSection) else property
)
for quantity in property_section.all_quantities.values():
quantity.m_annotations.pop(annotation_key, None)

for sub_section in property_section.all_sub_sections.values():
if sub_section.m_annotations.get(annotation_key):
_remove(sub_section, depth)
elif sub_section.sub_section.m_annotations.get(annotation_key):
_remove(sub_section.sub_section, depth)
else:
for (
inheriting_section
) in sub_section.sub_section.all_inheriting_sections:
if inheriting_section.m_annotations.get(annotation_key):
_remove(inheriting_section, depth)

_remove(property)
Loading