janosh · janosh · Feb 4, 2025 · Feb 1, 2025 · Feb 4, 2025 · Feb 4, 2025
diff --git a/matbench_discovery/data.py b/matbench_discovery/data.py
@@ -423,8 +423,8 @@ class Model(Files, base_dir=f"{ROOT}/models"):
     cgcnn_p = "cgcnn/cgcnn+p.yml"
 
     # DeepMD-DPA3 models
-    dpa3_v1_mptrj = "deepmd_dpa3/dpa3-v1-mptrj.yml"
-    dpa3_v1_openlam = "deepmd_dpa3/dpa3-v1-openlam.yml"
+    dpa3_v1_mptrj = "deepmd/dpa3-v1-mptrj.yml"
+    dpa3_v1_openlam = "deepmd/dpa3-v1-openlam.yml"
 
     # original M3GNet straight from publication, not re-trained
     m3gnet = "m3gnet/m3gnet.yml"
@@ -453,7 +453,7 @@ class Model(Files, base_dir=f"{ROOT}/models"):
     gnome = "gnome/gnome.yml"
 
     # MatterSim
-    mattersim = "mattersim/mattersim-v1.yml"
+    mattersim_v1_5m = "mattersim/mattersim-v1-5m.yml"
 
     # ORB
     orb = "orb/orb.yml"
@@ -530,17 +530,17 @@ def geo_opt_path(self) -> str | None:
         return f"{ROOT}/{rel_path}"
 
     @property
-    def phonons_path(self) -> str | None:
+    def kappa_103_path(self) -> str | None:
         """File path associated with the file URL if it exists, otherwise
         download the file first, then return the path.
         """
         phonons_metrics = self.metrics.get("phonons", {})
         if phonons_metrics in ("not available", "not applicable"):
             return None
-        rel_path = phonons_metrics.get("pred_file")
+        rel_path = phonons_metrics.get("kappa_103", {}).get("pred_file")
         if not rel_path:
             raise ValueError(
-                f"metrics.phonons.pred_file not found in {self.rel_path!r}"
+                f"metrics.phonons.kappa_103.pred_file not found in {self.rel_path!r}"
             )
         return f"{ROOT}/{rel_path}"
 

diff --git a/matbench_discovery/metrics/phonons.py b/matbench_discovery/metrics/phonons.py
@@ -53,9 +53,6 @@ def calc_kappa_metrics_from_dfs(
         - DFT_kappa_tot_avg: Reference DFT conductivity values
     """
     # Remove precomputed columns
-    cols_to_remove = [Key.srd, Key.sre, Key.srme, MbdKey.true_kappa_tot_avg]
-    df_pred = df_pred.drop(columns=cols_to_remove, errors="ignore")
-
     df_pred[MbdKey.kappa_tot_avg] = df_pred[MbdKey.kappa_tot_rta].map(
         calculate_kappa_avg
     )
@@ -102,7 +99,7 @@ def calculate_kappa_avg(kappa: np.ndarray) -> np.ndarray:
         any NaN values or if the calculation fails. For multiple temperatures,
         returns an array of averages.
     """
-    if np.any(np.isnan(kappa)):
+    if np.any(pd.isna(kappa)):
         return np.array([np.nan])
     try:
         return np.asarray(kappa)[..., :3].mean(axis=-1)
@@ -222,7 +219,7 @@ def calc_kappa_srme(kappas_pred: pd.Series, kappas_true: pd.Series) -> np.ndarra
                 f"Neither mode_kappa_tot_avg, mode_kappa_tot nor individual kappa\n"
                 f"components found in {label}, got\n{keys}"
             )
-        mode_kappa_tot_avgs[label] = kappas
+        mode_kappa_tot_avgs[label] = np.asarray(kappas)
 
     # calculating microscopic error for all temperatures
     microscopic_error = (

diff --git a/matbench_discovery/phonons/__init__.py b/matbench_discovery/phonons/__init__.py
@@ -1,8 +1,17 @@
 """This package contains phonon-related functionality."""
 
+from typing import Final
+
 import numpy as np
 import pandas as pd
 
+# q-point mesh (which phonon modes to sample) based on international space group number
+spg_num_q_mesh_map: Final[dict[int, tuple[int, int, int]]] = {
+    225: (19, 19, 19),  # rocksalt
+    186: (19, 19, 15),  # wurtzite
+    216: (19, 19, 19),  # zincblende
+}
+
 
 def check_imaginary_freqs(frequencies: np.ndarray, threshold: float = -0.01) -> bool:
     """Check if frequencies are imaginary.

diff --git a/matbench_discovery/phonons/thermal_conductivity.py b/matbench_discovery/phonons/thermal_conductivity.py
@@ -9,6 +9,7 @@
 """
 
 import warnings
+from collections.abc import Sequence
 from copy import deepcopy
 from typing import Any
 
@@ -108,17 +109,25 @@ def calculate_fc3_set(
 
 def init_phono3py(
     atoms: Atoms,
-    symprec: float = 1e-5,
+    *,
+    fc2_supercell: np.ndarray,
+    fc3_supercell: np.ndarray,
+    q_point_mesh: tuple[int, int, int] = (20, 20, 20),
     displacement_distance: float = 0.03,
+    symprec: float = 1e-5,
     **kwargs: Any,
 ) -> Phono3py:
     """Initialize Phono3py object from ASE Atoms.
 
     Args:
         atoms (Atoms): ASE Atoms object to initialize from.
-        symprec (float): Symmetry precision for finding space group. Defaults to 1e-5.
+        fc2_supercell (np.ndarray): Supercell matrix for 2nd order force constants.
+        fc3_supercell (np.ndarray): Supercell matrix for 3rd order force constants.
+        q_point_mesh (tuple[int, int, int]): Mesh size for q-point sampling. Defaults
+            to (20, 20, 20).
         displacement_distance (float): Displacement distance for force calculations.
             Defaults to 0.03.
+        symprec (float): Symmetry precision for finding space group. Defaults to 1e-5.
         **kwargs (Any): Passed to Phono3py constructor.
 
     Returns:
@@ -127,21 +136,16 @@ def init_phono3py(
     Raises:
         ValueError: If required metadata is missing from atoms.info
     """
-    formula = atoms.get_chemical_formula(mode="metal")
-    for key in ("fc2_supercell", "fc3_supercell", "q_mesh"):
-        if key not in atoms.info:
-            raise ValueError(f"{formula} {key=} not found in {atoms.info=}")
-
     unit_cell = PhonopyAtoms(atoms.symbols, cell=atoms.cell, positions=atoms.positions)
     ph3 = Phono3py(
         unitcell=unit_cell,
-        supercell_matrix=atoms.info["fc3_supercell"],
-        phonon_supercell_matrix=atoms.info["fc2_supercell"],
+        supercell_matrix=fc3_supercell,
+        phonon_supercell_matrix=fc2_supercell,
         primitive_matrix="auto",
         symprec=symprec,
         **kwargs,
     )
-    ph3.mesh_numbers = atoms.info["q_mesh"]
+    ph3.mesh_numbers = q_point_mesh
 
     ph3.generate_displacements(distance=displacement_distance)
 
@@ -207,7 +211,7 @@ def load_force_sets(
 
 def calculate_conductivity(
     ph3: Phono3py,
-    temperatures: list[float],
+    temperatures: Sequence[float],
     boundary_mfp: float = 1e6,
     mode_kappa_thresh: float = 1e-6,
     **kwargs: Any,

diff --git a/models/chgnet/chgnet.yml b/models/chgnet/chgnet.yml
@@ -65,7 +65,10 @@ notes:
 
 metrics:
   phonons:
-    κ_SRME: 1.717
+    kappa_103:
+      κ_SRME: 1.717
+      pred_file: models/chgnet/chgnet-0.3.0/2024-11-09-kappa-103-FIRE-fmax=1e-4-symprec=1e-5.json.gz
+      pred_file_url: https://figshare.com/ndownloader/files/52134857
   geo_opt:
     pred_file: models/chgnet/chgnet-0.3.0/2023-12-21-wbm-geo-opt.json.gz
     pred_file_url: https://figshare.com/ndownloader/files/52061999

diff --git a/models/deepmd_dpa3/dpa3-v1-mptrj.yml → models/deepmd/dpa3-v1-mptrj.yml b/models/deepmd_dpa3/dpa3-v1-mptrj.yml → models/deepmd/dpa3-v1-mptrj.yml
@@ -88,20 +88,22 @@ notes:
     Designed as a large atomic model (LAM), DPA3 is tailored to integrate and simultaneously train on datasets from various disciplines, encompassing diverse chemical and materials systems across different research domains.
     Its model design ensures exceptional fitting accuracy and robust generalization both within and beyond the training domain.
     Furthermore, DPA3 maintains energy conservation and respects the physical symmetries of the potential energy surface, making it a dependable tool for a wide range of scientific applications.
+
 metrics:
   phonons:
-    pred_file: models/deepmd_dpa3/dpa3-v1-mptrj/2025-01-10-kappa-103.json.gz
-    pred_file_url: https://figshare.com/ndownloader/files/52057901
-    κ_SRME: 0.964
+    kappa_103:
+      κ_SRME: 0.964
+      pred_file: models/deepmd/dpa3-v1-mptrj/2025-01-10-kappa-103-FIRE-dist=0.01-fmax=1e-4-symprec=1e-5.json.gz
+      pred_file_url: https://figshare.com/ndownloader/files/52134860
   geo_opt:
-    pred_file: models/deepmd_dpa3/dpa3-v1-mptrj/2025-01-10-wbm-geo-opt.json.gz
+    pred_file: models/deepmd/dpa3-v1-mptrj/2025-01-10-wbm-geo-opt.json.gz
     pred_col: dp_structure
-    pred_file_url:
+    pred_file_url: https://figshare.com/ndownloader/files/52134974
     symprec=1e-5:
-      analysis_file: models/deepmd_dpa3/dpa3-v1-mptrj/2025-01-10-wbm-geo-opt-symprec=1e-5.csv.gz
+      analysis_file: models/deepmd/dpa3-v1-mptrj/2025-01-10-wbm-geo-opt-symprec=1e-5.csv.gz
       analysis_file_url: https://figshare.com/ndownloader/files/52059431
   discovery:
-    pred_file: models/deepmd_dpa3/dpa3-v1-mptrj/2025-01-10-wbm-IS2RE.csv.gz
+    pred_file: models/deepmd/dpa3-v1-mptrj/2025-01-10-wbm-IS2RE.csv.gz
     pred_file_url: https://figshare.com/ndownloader/files/52057529
     pred_col: e_form_per_atom_dp
     full_test_set:

diff --git a/models/deepmd_dpa3/dpa3-v1-openlam.yml → models/deepmd/dpa3-v1-openlam.yml b/models/deepmd_dpa3/dpa3-v1-openlam.yml → models/deepmd/dpa3-v1-openlam.yml
@@ -100,20 +100,22 @@ notes:
     Designed as a large atomic model (LAM), DPA3 is tailored to integrate and simultaneously train on datasets from various disciplines, encompassing diverse chemical and materials systems across different research domains.
     Its model design ensures exceptional fitting accuracy and robust generalization both within and beyond the training domain.
     Furthermore, DPA3 maintains energy conservation and respects the physical symmetries of the potential energy surface, making it a dependable tool for a wide range of scientific applications.
+
 metrics:
   phonons:
-    pred_file: models/deepmd_dpa3/dpa3-v1-openlam/2025-01-10-kappa-103.json.gz
-    pred_file_url: https://figshare.com/ndownloader/files/52057904
-    κ_SRME: 0.741
+    kappa_103:
+      κ_SRME: 0.741
+      pred_file: models/deepmd/dpa3-v1-openlam/2025-01-10-kappa-103-FIRE-dist=0.01-fmax=1e-4-symprec=1e-5.json.gz
+      pred_file_url: https://figshare.com/ndownloader/files/52134863
   geo_opt:
-    pred_file: models/deepmd_dpa3/dpa3-v1-openlam/2025-01-10-wbm-geo-opt.json.gz
+    pred_file: models/deepmd/dpa3-v1-openlam/2025-01-10-wbm-geo-opt.json.gz
     pred_col: dp_structure
-    pred_file_url:
+    pred_file_url: https://figshare.com/ndownloader/files/52135358
     symprec=1e-5:
-      analysis_file: models/deepmd_dpa3/dpa3-v1-openlam/2025-01-10-wbm-geo-opt-symprec=1e-5.csv.gz
+      analysis_file: models/deepmd/dpa3-v1-openlam/2025-01-10-wbm-geo-opt-symprec=1e-5.csv.gz
       analysis_file_url: https://figshare.com/ndownloader/files/52059434
   discovery:
-    pred_file: models/deepmd_dpa3/dpa3-v1-openlam/2025-01-10-wbm-IS2RE.csv.gz
+    pred_file: models/deepmd/dpa3-v1-openlam/2025-01-10-wbm-IS2RE.csv.gz
     pred_file_url: https://figshare.com/ndownloader/files/52057532
     pred_col: e_form_per_atom_dp
     full_test_set:

diff --git a/models/deepmd_dpa3/join_dpa3_preds.py → models/deepmd/join_dpa3_preds.py b/models/deepmd_dpa3/join_dpa3_preds.py → models/deepmd/join_dpa3_preds.py
diff --git a/models/deepmd_dpa3/readme.md → models/deepmd/readme.md b/models/deepmd_dpa3/readme.md → models/deepmd/readme.md
diff --git a/models/deepmd_dpa3/test_dpa3.py → models/deepmd/test_dpa3.py b/models/deepmd_dpa3/test_dpa3.py → models/deepmd/test_dpa3.py
diff --git a/models/eqV2/eqV2-m-omat-mp-salex.yml b/models/eqV2/eqV2-m-omat-mp-salex.yml
@@ -85,6 +85,11 @@ notes:
     Training was done by fine-tuning a model pretrained for 2 epochs on the OMat24 dataset.
 
 metrics:
+  phonons:
+    kappa_103:
+      κ_SRME: 1.717
+      pred_file: models/eqV2/eqV2-m-omat-mp-salex/2024-11-09-kappa-103-FIRE-dist=0.01-fmax=1e-4-symprec=1e-5.json.gz
+      pred_file_url: https://figshare.com/ndownloader/files/52134893
   geo_opt:
     pred_file: models/eqV2/eqV2-m-omat-mp-salex/2024-10-18-wbm-geo-opt.json.gz
     pred_file_url: https://figshare.com/ndownloader/files/51607436

diff --git a/models/eqV2/eqV2-s-dens-mp.yml b/models/eqV2/eqV2-s-dens-mp.yml
@@ -88,7 +88,10 @@ notes:
 
 metrics:
   phonons:
-    κ_SRME: 1.665 # eqV2 S without denoising (no DeNS) achieves slightly worse κ_SRME=1.772
+    kappa_103:
+      κ_SRME: 1.665 # eqV2 S without denoising (no DeNS) achieves slightly worse κ_SRME=1.772
+      pred_file: TODO find this file or regenerate it
+      pred_file_url: https://figshare.com/TODO add this URL
   geo_opt:
     pred_file: models/eqV2/eqV2-s-dens-mp/2024-10-18-wbm-geo-opt.json.gz
     pred_file_url: https://figshare.com/ndownloader/files/52062392

diff --git a/models/gnome/gnome.yml b/models/gnome/gnome.yml
@@ -61,10 +61,8 @@ notes:
   Missing Preds: According to the authors, the 1734 missing WBM predictions are mostly due out-of-memory (OOM) errors. The model was evaluated on A100s but without neighbor lists. The plan is to backfill the missing predictions once H100s are available or neighbor list implementation goes live.
 
 metrics:
-  geo_opt:
-    pred_file: # not (yet) shared by authors
-    pred_file_url:
-    pred_col:
+  phonons: not available # model is closed source, original GnoME submission predates phonon tasks and DeepMind did not resubmit later
+  geo_opt: not available # author's declined to share model-relaxed structures and can't be reproduced without model access
   discovery:
     pred_file: models/gnome/2023-11-01-gnome-50076332-wbm-IS2RE.csv.gz
     pred_file_url: https://figshare.com/ndownloader/files/52057556

diff --git a/models/grace/grace2l-r6.yml b/models/grace/grace2l-r6.yml
@@ -51,7 +51,10 @@ hyperparams:
 
 metrics:
   phonons:
-    κ_SRME: 0.525 # https://github.com/MPA2suite/k_SRME/pull/11/files
+    kappa_103:
+      κ_SRME: 0.525 # https://github.com/MPA2suite/k_SRME/pull/11/files
+      pred_file: models/grace/grace2l_r6/2024-11-20-kappa-103-FIRE-fmax=1e-4-symprec=1e-5.json.gz
+      pred_file_url: https://figshare.com/ndownloader/files/52134896
   geo_opt:
     pred_file: models/grace/grace2l_r6/2024-11-11-relaxed-structures.json.gz
     pred_file_url: https://figshare.com/ndownloader/files/52062590

diff --git a/models/m3gnet/m3gnet.yml b/models/m3gnet/m3gnet.yml
@@ -55,7 +55,10 @@ notes:
 
 metrics:
   phonons:
-    κ_SRME: 1.412
+    kappa_103:
+      κ_SRME: 1.412
+      pred_file: models/m3gnet/m3gnet-matgl-mp-2021-2-8-pes/2024-11-09-kappa-103-FIRE-dist=0.01-fmax=1e-4-symprec=1e-5.json.gz
+      pred_file_url: https://figshare.com/ndownloader/files/52134866
   geo_opt:
     pred_file: models/m3gnet/m3gnet-tf-manual-sampling/2023-06-01-wbm-geo-opt.json.gz
     pred_file_url: https://figshare.com/ndownloader/files/52062011

diff --git a/models/m3gnet/test_m3gnet_discovery.py b/models/m3gnet/test_m3gnet_discovery.py
@@ -74,11 +74,13 @@
 if slurm_array_task_count > 1:
     df_in = np.array_split(df_in, slurm_array_task_count)[slurm_array_task_id - 1]
 
-checkpoint = None
 if model_type == "direct":
     checkpoint = f"{ROOT}/models/{model_name}/2023-05-26-DI-DFTstrictF10-TTRS-128U-442E"
-if model_type == "ms":
+elif model_type == "manual-sampling":
     checkpoint = f"{ROOT}/models/{model_name}/2023-05-26-MS-DFTstrictF10-128U-154E"
+else:
+    raise ValueError(f"{model_type=} not supported")
+
 relax_results: dict[str, dict[str, Any]] = {}
 m3gnet = Relaxer(potential=checkpoint)  # load pre-trained M3GNet model