Add phonon workflow

nomad-coe · Feb 12, 2025 · 175b7af · 175b7af · github-actions · Feb 12, 2025
1 parent 3e46ccc
commit 175b7af
Show file tree

Hide file tree

Showing 5 changed files with 280 additions and 125 deletions.
diff --git a/src/nomad_simulations/schema_packages/workflow/__init__.py b/src/nomad_simulations/schema_packages/workflow/__init__.py
@@ -2,5 +2,6 @@
 from .geometry_optimization import GeometryOptimization
 from .gw import DFTGWWorkflow
 from .molecular_dynamics import MolecularDynamics
+from .phonon import Phonon
 from .single_point import SinglePoint
 from .thermodynamics import Thermodynamics
diff --git a/src/nomad_simulations/schema_packages/workflow/general.py b/src/nomad_simulations/schema_packages/workflow/general.py
@@ -13,6 +13,10 @@
 m_package = SchemaPackage()
 
 
+class SimulationTask(Task):
+    pass
+
+
 class SimulationWorkflowModel(ArchiveSection):
     """
     Base class for simulation workflow model sub-section definition.
@@ -66,9 +70,16 @@ def normalize(self, archive: EntryArchive, logger: BoundLogger) -> None:
             self.final_outputs = archive.data.outputs[-1]
 
 
-class SimulationWorkflow(Workflow):
+class SimulationTaskReference(TaskReference, SimulationTask):
+    pass
+
+
+class SimulationWorkflow(Workflow, SimulationTask):
     """
     Base class for simulation workflows.
+
+    It contains sub-sections model and results which are included in inputs and
+    outputs, respectively.
     """
 
     task_label = 'Task'
@@ -78,126 +89,56 @@ class SimulationWorkflow(Workflow):
     results = SubSection(sub_section=SimulationWorkflowResults.m_def)
 
     def map_inputs(self, archive: EntryArchive, logger: BoundLogger) -> None:
-        if not self.model:
-            self.model = SimulationWorkflowModel()
-
-        self.model.normalize(archive, logger)
-
-        # set method as inputs
-        self.inputs.append(Link(name=self.model.label, section=self.model))
+        if self.model:
+            self.model.normalize(archive, logger)
+            # add method to inputs
+            self.inputs.append(Link(name=self.model.label, section=self.model))
 
     def map_outputs(self, archive: EntryArchive, logger: BoundLogger) -> None:
-        if not self.results:
-            self.results = SimulationWorkflowResults()
-
-        self.results.normalize(archive, logger)
-
-        # set results as outputs
-        self.outputs.append(Link(name=self.results.label, section=self.results))
+        if self.results:
+            self.results.normalize(archive, logger)
+            # add results to outputs
+            self.outputs.append(Link(name=self.results.label, section=self.results))
 
     def map_tasks(self, archive: EntryArchive, logger: BoundLogger) -> None:
         """
         Generate tasks from archive data outputs. Tasks are ordered and linked based
         on the execution time of the calculation corresponding to the output.
-        By default, the tasks follow the order of the outputs and are linked sequentially.
         """
         if not archive.data or not archive.data.outputs:
             return
 
-        # default should to serial execution
-        times: list[tuple[float, float]] = list(
-            [
-                (
-                    o.wall_start.magnitude if o.wall_start else n,
-                    o.wall_end.magnitude if o.wall_end else n,
+        tasks = []
+        for outputs in archive.data.outputs:
+            tasks.append(
+                SimulationTask(
+                    outputs=[
+                        Link(
+                            name='Outputs',
+                            section=outputs,
+                        )
+                    ]
                 )
-                for n, o in enumerate(archive.data.outputs)
-            ]
-        )
-        times.sort(key=lambda x: x[0])
-        # current index of parent
-        parent_n = 0
-        for n, time in enumerate(times):
-            task = Task(
-                outputs=[
-                    Link(
-                        name='Outputs',
-                        section=archive.data.outputs[n],
-                    )
-                ],
             )
-            self.tasks.append(task)
-            # link tasks based on overlap in execution time
-            if time[0] >= times[parent_n][1]:
-                # assign as new parent
-                parent_n = n
-                # reset outputs
-                self._grouped_tasks.append([n])
-            else:
-                if not self._grouped_tasks:
-                    self._grouped_tasks.append([])
-                self._grouped_tasks[-1].append(n)
+
+        self.tasks.extend(tasks)
 
     def normalize(self, archive: EntryArchive, logger: BoundLogger):
+        """
+        Link tasks based on start and end times.
+        """
+        if not self.name:
+            self.name = self.m_def.name
+
         if not self.inputs:
             self.map_inputs(archive, logger)
 
         if not self.outputs:
             self.map_outputs(archive, logger)
 
-        # group tasks in parallel
-        # assume serial workflow
-        self._grouped_tasks = [[n] for n in range(len(self.tasks))]
-
         if not self.tasks:
             self.map_tasks(archive, logger)
 
-        # add task inputs/outputs to reference
-        # TODO do this in TaskReference normalizer
-        for task in self.tasks:
-            if isinstance(task, TaskReference):
-                task.inputs.extend(task.task.inputs)
-                task.outputs.extend(task.task.outputs)
-
-        # link successive task groups, first group adds workflow inputs
-        for tasks_n, grouped_tasks in enumerate(self._grouped_tasks):
-            # assign outputs of previous tasks an input to next tasks
-            if tasks_n:
-                inputs = [
-                    inp
-                    for task in [
-                        self.tasks[n] for n in self._grouped_tasks[tasks_n - 1]
-                    ]
-                    for inp in task.outputs
-                ]
-            else:
-                inputs = self.inputs
-            for n in grouped_tasks:
-                self.tasks[n].inputs.extend(inputs)
-                if isinstance(self.tasks[n], TaskReference):
-                    self.tasks[n].task.inputs.extend(inputs)
-
-        if self._grouped_tasks:
-            # add inputs of first group to workflow inputs
-            self.inputs.extend(
-                [
-                    inp
-                    for task in [self.tasks[n] for n in self._grouped_tasks[0]]
-                    for inp in task.inputs
-                    if inp not in self.inputs
-                ]
-            )
-
-            # add outputs of last group to workflow outputs
-            self.outputs.extend(
-                [
-                    out
-                    for task in [self.tasks[n] for n in self._grouped_tasks[-1]]
-                    for out in task.outputs
-                    if out not in self.outputs
-                ]
-            )
-
 
 class SerialWorkflow(SimulationWorkflow):
     """
@@ -210,6 +151,35 @@ def map_tasks(self, archive: EntryArchive, logger: BoundLogger) -> None:
             if not task.name:
                 task.name = f'{self.task_label} {n}'
 
+    def normalize(self, archive: EntryArchive, logger: BoundLogger) -> None:
+        super().normalize(archive, logger)
+
+        if not self.tasks:
+            logger.error(INCORRECT_N_TASKS)
+            return
+
+        # link tasks sequentially
+        for n, task in enumerate(self.tasks):
+            if n == 0:
+                inputs = self.inputs
+            else:
+                previous_task = self.tasks[n - 1]
+                inputs = [
+                    Link(
+                        name='Linked task',
+                        section=previous_task.task
+                        if isinstance(previous_task, TaskReference)
+                        else previous_task,
+                    )
+                ]
+
+            task.inputs.extend([inp for inp in inputs if inp not in task.inputs])
+
+        # add oututs of last task to outputs
+        self.outputs.extend(
+            [out for out in self.tasks[-1].outputs if out not in self.outputs]
+        )
+
 
 class ElectronicStructureResults(SimulationWorkflowResults):
     """

diff --git a/src/nomad_simulations/schema_packages/workflow/geometry_optimization.py b/src/nomad_simulations/schema_packages/workflow/geometry_optimization.py
@@ -196,7 +196,7 @@ def map_inputs(self, archive: EntryArchive, logger: BoundLogger) -> None:
             self.model = GeometryOptimizationModel()
         super().map_inputs(archive, logger)
 
-    def map_outputs(self, archive: EntryArchive, logger: BoundLogger):
+    def map_outputs(self, archive: EntryArchive, logger: BoundLogger) -> None:
         if not self.results:
             self.results = GeometryOptimizationResults()
         super().map_outputs(archive, logger)
File	Stmts	Miss	Cover	Missing
src/nomad_simulations
__init__.py	4	2	50%	3–4
_version.py	11	2	82%	5–6
src/nomad_simulations/schema_packages
__init__.py	15	2	87%	39–41
atoms_state.py	190	21	89%	13–15, 201–204, 228, 283–284, 352–353, 355, 537, 549–550, 611–615, 630–634, 641
basis_set.py	240	28	88%	8–9, 122–133, 172–185, 208, 391–395, 417–418, 462–465, 584, 615, 617
general.py	85	8	91%	4–7, 123, 143, 253–254, 264
model_method.py	269	78	71%	10–12, 171–174, 177–184, 276–277, 297, 318–339, 355–381, 384–401, 587, 780, 791, 833–840, 878, 897, 977, 1034, 1109, 1223
model_system.py	348	37	89%	45–51, 235, 254, 258, 261, 264, 290, 376–377, 454–455, 472–473, 686–689, 736–743, 917–918, 1140–1144, 1150–1151, 1159–1160, 1165, 1188
numerical_settings.py	259	61	76%	12–14, 217, 219–220, 223–226, 230–231, 238–241, 250–253, 257–260, 262–265, 270–273, 279–282, 469–496, 571, 606–609, 633, 636, 681, 683–686, 690, 694, 741, 745–766, 821–822, 889
outputs.py	120	10	92%	8–9, 253–256, 296–299, 324, 326, 363, 382
physical_property.py	102	7	93%	20–22, 202, 331–333
variables.py	86	12	86%	8–10, 98, 121, 145, 167, 189, 211, 233, 256, 276
src/nomad_simulations/schema_packages/properties
band_gap.py	51	5	90%	8–10, 135–136
band_structure.py	123	25	80%	9–11, 232–265, 278, 285, 321–322, 325, 372–373, 378
energies.py	42	9	79%	7–9, 36, 57, 82, 103, 119, 134
fermi_surface.py	17	4	76%	7–9, 40
forces.py	22	6	73%	7–9, 36, 56, 79
greens_function.py	99	13	87%	7–9, 210–211, 214, 235–236, 239, 260–261, 264, 400
hopping_matrix.py	29	5	83%	7–9, 58, 94
permittivity.py	48	8	83%	7–9, 97–105
spectral_profile.py	260	128	51%	9–11, 57–60, 95–98, 199–300, 356–368, 393–396, 416, 421–424, 466–502, 526, 573–576, 592–593, 598–604
thermodynamics.py	75	27	64%	7–9, 35, 56, 72, 81, 90, 101, 110, 137, 147, 157, 172–174, 177, 193, 213–215, 218, 234, 254–256, 259
src/nomad_simulations/schema_packages/utils
utils.py	79	16	80%	8–11, 65–74, 83–84, 89, 92, 169–170
src/nomad_simulations/schema_packages/workflow
__init__.py	7	7	0%	1–7
general.py	82	82	0%	1–197
geometry_optimization.py	53	53	0%	1–205
gw.py	28	28	0%	1–61
molecular_dynamics.py	27	27	0%	1–145
phonon.py	47	47	0%	1–186
single_point.py	28	28	0%	1–70
thermodynamics.py	23	23	0%	1–63
TOTAL	2890	809	72%