Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Record kappa_103.pred_file paths and URLs in model YAML files #199

Merged
merged 11 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions matbench_discovery/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,8 +423,8 @@ class Model(Files, base_dir=f"{ROOT}/models"):
cgcnn_p = "cgcnn/cgcnn+p.yml"

# DeepMD-DPA3 models
dpa3_v1_mptrj = "deepmd_dpa3/dpa3-v1-mptrj.yml"
dpa3_v1_openlam = "deepmd_dpa3/dpa3-v1-openlam.yml"
dpa3_v1_mptrj = "deepmd/dpa3-v1-mptrj.yml"
dpa3_v1_openlam = "deepmd/dpa3-v1-openlam.yml"

# original M3GNet straight from publication, not re-trained
m3gnet = "m3gnet/m3gnet.yml"
Expand Down Expand Up @@ -453,7 +453,7 @@ class Model(Files, base_dir=f"{ROOT}/models"):
gnome = "gnome/gnome.yml"

# MatterSim
mattersim = "mattersim/mattersim-v1.yml"
mattersim_v1_5m = "mattersim/mattersim-v1-5m.yml"

# ORB
orb = "orb/orb.yml"
Expand Down Expand Up @@ -530,17 +530,17 @@ def geo_opt_path(self) -> str | None:
return f"{ROOT}/{rel_path}"

@property
def phonons_path(self) -> str | None:
def kappa_103_path(self) -> str | None:
"""File path associated with the file URL if it exists, otherwise
download the file first, then return the path.
"""
phonons_metrics = self.metrics.get("phonons", {})
if phonons_metrics in ("not available", "not applicable"):
return None
rel_path = phonons_metrics.get("pred_file")
rel_path = phonons_metrics.get("kappa_103", {}).get("pred_file")
if not rel_path:
raise ValueError(
f"metrics.phonons.pred_file not found in {self.rel_path!r}"
f"metrics.phonons.kappa_103.pred_file not found in {self.rel_path!r}"
)
return f"{ROOT}/{rel_path}"

Expand Down
7 changes: 2 additions & 5 deletions matbench_discovery/metrics/phonons.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ def calc_kappa_metrics_from_dfs(
- DFT_kappa_tot_avg: Reference DFT conductivity values
"""
# Remove precomputed columns
cols_to_remove = [Key.srd, Key.sre, Key.srme, MbdKey.true_kappa_tot_avg]
df_pred = df_pred.drop(columns=cols_to_remove, errors="ignore")

df_pred[MbdKey.kappa_tot_avg] = df_pred[MbdKey.kappa_tot_rta].map(
calculate_kappa_avg
)
Expand Down Expand Up @@ -102,7 +99,7 @@ def calculate_kappa_avg(kappa: np.ndarray) -> np.ndarray:
any NaN values or if the calculation fails. For multiple temperatures,
returns an array of averages.
"""
if np.any(np.isnan(kappa)):
if np.any(pd.isna(kappa)):
return np.array([np.nan])
try:
return np.asarray(kappa)[..., :3].mean(axis=-1)
Expand Down Expand Up @@ -222,7 +219,7 @@ def calc_kappa_srme(kappas_pred: pd.Series, kappas_true: pd.Series) -> np.ndarra
f"Neither mode_kappa_tot_avg, mode_kappa_tot nor individual kappa\n"
f"components found in {label}, got\n{keys}"
)
mode_kappa_tot_avgs[label] = kappas
mode_kappa_tot_avgs[label] = np.asarray(kappas)

# calculating microscopic error for all temperatures
microscopic_error = (
Expand Down
9 changes: 9 additions & 0 deletions matbench_discovery/phonons/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
"""This package contains phonon-related functionality."""

from typing import Final

import numpy as np
import pandas as pd

# q-point mesh (which phonon modes to sample) based on international space group number
spg_num_q_mesh_map: Final[dict[int, tuple[int, int, int]]] = {
225: (19, 19, 19), # rocksalt
186: (19, 19, 15), # wurtzite
216: (19, 19, 19), # zincblende
}


def check_imaginary_freqs(frequencies: np.ndarray, threshold: float = -0.01) -> bool:
"""Check if frequencies are imaginary.
Expand Down
26 changes: 15 additions & 11 deletions matbench_discovery/phonons/thermal_conductivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""

import warnings
from collections.abc import Sequence
from copy import deepcopy
from typing import Any

Expand Down Expand Up @@ -108,17 +109,25 @@ def calculate_fc3_set(

def init_phono3py(
atoms: Atoms,
symprec: float = 1e-5,
*,
fc2_supercell: np.ndarray,
fc3_supercell: np.ndarray,
q_point_mesh: tuple[int, int, int] = (20, 20, 20),
displacement_distance: float = 0.03,
symprec: float = 1e-5,
**kwargs: Any,
) -> Phono3py:
"""Initialize Phono3py object from ASE Atoms.

Args:
atoms (Atoms): ASE Atoms object to initialize from.
symprec (float): Symmetry precision for finding space group. Defaults to 1e-5.
fc2_supercell (np.ndarray): Supercell matrix for 2nd order force constants.
fc3_supercell (np.ndarray): Supercell matrix for 3rd order force constants.
q_point_mesh (tuple[int, int, int]): Mesh size for q-point sampling. Defaults
to (20, 20, 20).
displacement_distance (float): Displacement distance for force calculations.
Defaults to 0.03.
symprec (float): Symmetry precision for finding space group. Defaults to 1e-5.
**kwargs (Any): Passed to Phono3py constructor.

Returns:
Expand All @@ -127,21 +136,16 @@ def init_phono3py(
Raises:
ValueError: If required metadata is missing from atoms.info
"""
formula = atoms.get_chemical_formula(mode="metal")
for key in ("fc2_supercell", "fc3_supercell", "q_mesh"):
if key not in atoms.info:
raise ValueError(f"{formula} {key=} not found in {atoms.info=}")

unit_cell = PhonopyAtoms(atoms.symbols, cell=atoms.cell, positions=atoms.positions)
ph3 = Phono3py(
unitcell=unit_cell,
supercell_matrix=atoms.info["fc3_supercell"],
phonon_supercell_matrix=atoms.info["fc2_supercell"],
supercell_matrix=fc3_supercell,
phonon_supercell_matrix=fc2_supercell,
primitive_matrix="auto",
symprec=symprec,
**kwargs,
)
ph3.mesh_numbers = atoms.info["q_mesh"]
ph3.mesh_numbers = q_point_mesh

ph3.generate_displacements(distance=displacement_distance)

Expand Down Expand Up @@ -207,7 +211,7 @@ def load_force_sets(

def calculate_conductivity(
ph3: Phono3py,
temperatures: list[float],
temperatures: Sequence[float],
boundary_mfp: float = 1e6,
mode_kappa_thresh: float = 1e-6,
**kwargs: Any,
Expand Down
5 changes: 4 additions & 1 deletion models/chgnet/chgnet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ notes:

metrics:
phonons:
κ_SRME: 1.717
kappa_103:
κ_SRME: 1.717
pred_file: models/chgnet/chgnet-0.3.0/2024-11-09-kappa-103-FIRE-fmax=1e-4-symprec=1e-5.json.gz
pred_file_url: https://figshare.com/ndownloader/files/52134857
geo_opt:
pred_file: models/chgnet/chgnet-0.3.0/2023-12-21-wbm-geo-opt.json.gz
pred_file_url: https://figshare.com/ndownloader/files/52061999
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,20 +88,22 @@ notes:
Designed as a large atomic model (LAM), DPA3 is tailored to integrate and simultaneously train on datasets from various disciplines, encompassing diverse chemical and materials systems across different research domains.
Its model design ensures exceptional fitting accuracy and robust generalization both within and beyond the training domain.
Furthermore, DPA3 maintains energy conservation and respects the physical symmetries of the potential energy surface, making it a dependable tool for a wide range of scientific applications.

metrics:
phonons:
pred_file: models/deepmd_dpa3/dpa3-v1-mptrj/2025-01-10-kappa-103.json.gz
pred_file_url: https://figshare.com/ndownloader/files/52057901
κ_SRME: 0.964
kappa_103:
κ_SRME: 0.964
pred_file: models/deepmd/dpa3-v1-mptrj/2025-01-10-kappa-103-FIRE-dist=0.01-fmax=1e-4-symprec=1e-5.json.gz
pred_file_url: https://figshare.com/ndownloader/files/52134860
geo_opt:
pred_file: models/deepmd_dpa3/dpa3-v1-mptrj/2025-01-10-wbm-geo-opt.json.gz
pred_file: models/deepmd/dpa3-v1-mptrj/2025-01-10-wbm-geo-opt.json.gz
pred_col: dp_structure
pred_file_url:
pred_file_url: https://figshare.com/ndownloader/files/52134974
symprec=1e-5:
analysis_file: models/deepmd_dpa3/dpa3-v1-mptrj/2025-01-10-wbm-geo-opt-symprec=1e-5.csv.gz
analysis_file: models/deepmd/dpa3-v1-mptrj/2025-01-10-wbm-geo-opt-symprec=1e-5.csv.gz
analysis_file_url: https://figshare.com/ndownloader/files/52059431
discovery:
pred_file: models/deepmd_dpa3/dpa3-v1-mptrj/2025-01-10-wbm-IS2RE.csv.gz
pred_file: models/deepmd/dpa3-v1-mptrj/2025-01-10-wbm-IS2RE.csv.gz
pred_file_url: https://figshare.com/ndownloader/files/52057529
pred_col: e_form_per_atom_dp
full_test_set:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,20 +100,22 @@ notes:
Designed as a large atomic model (LAM), DPA3 is tailored to integrate and simultaneously train on datasets from various disciplines, encompassing diverse chemical and materials systems across different research domains.
Its model design ensures exceptional fitting accuracy and robust generalization both within and beyond the training domain.
Furthermore, DPA3 maintains energy conservation and respects the physical symmetries of the potential energy surface, making it a dependable tool for a wide range of scientific applications.

metrics:
phonons:
pred_file: models/deepmd_dpa3/dpa3-v1-openlam/2025-01-10-kappa-103.json.gz
pred_file_url: https://figshare.com/ndownloader/files/52057904
κ_SRME: 0.741
kappa_103:
κ_SRME: 0.741
pred_file: models/deepmd/dpa3-v1-openlam/2025-01-10-kappa-103-FIRE-dist=0.01-fmax=1e-4-symprec=1e-5.json.gz
pred_file_url: https://figshare.com/ndownloader/files/52134863
geo_opt:
pred_file: models/deepmd_dpa3/dpa3-v1-openlam/2025-01-10-wbm-geo-opt.json.gz
pred_file: models/deepmd/dpa3-v1-openlam/2025-01-10-wbm-geo-opt.json.gz
pred_col: dp_structure
pred_file_url:
pred_file_url: https://figshare.com/ndownloader/files/52135358
symprec=1e-5:
analysis_file: models/deepmd_dpa3/dpa3-v1-openlam/2025-01-10-wbm-geo-opt-symprec=1e-5.csv.gz
analysis_file: models/deepmd/dpa3-v1-openlam/2025-01-10-wbm-geo-opt-symprec=1e-5.csv.gz
analysis_file_url: https://figshare.com/ndownloader/files/52059434
discovery:
pred_file: models/deepmd_dpa3/dpa3-v1-openlam/2025-01-10-wbm-IS2RE.csv.gz
pred_file: models/deepmd/dpa3-v1-openlam/2025-01-10-wbm-IS2RE.csv.gz
pred_file_url: https://figshare.com/ndownloader/files/52057532
pred_col: e_form_per_atom_dp
full_test_set:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
5 changes: 5 additions & 0 deletions models/eqV2/eqV2-m-omat-mp-salex.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ notes:
Training was done by fine-tuning a model pretrained for 2 epochs on the OMat24 dataset.

metrics:
phonons:
kappa_103:
κ_SRME: 1.717
pred_file: models/eqV2/eqV2-m-omat-mp-salex/2024-11-09-kappa-103-FIRE-dist=0.01-fmax=1e-4-symprec=1e-5.json.gz
pred_file_url: https://figshare.com/ndownloader/files/52134893
geo_opt:
pred_file: models/eqV2/eqV2-m-omat-mp-salex/2024-10-18-wbm-geo-opt.json.gz
pred_file_url: https://figshare.com/ndownloader/files/51607436
Expand Down
5 changes: 4 additions & 1 deletion models/eqV2/eqV2-s-dens-mp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,10 @@ notes:

metrics:
phonons:
κ_SRME: 1.665 # eqV2 S without denoising (no DeNS) achieves slightly worse κ_SRME=1.772
kappa_103:
κ_SRME: 1.665 # eqV2 S without denoising (no DeNS) achieves slightly worse κ_SRME=1.772
pred_file: TODO find this file or regenerate it
pred_file_url: https://figshare.com/TODO add this URL
geo_opt:
pred_file: models/eqV2/eqV2-s-dens-mp/2024-10-18-wbm-geo-opt.json.gz
pred_file_url: https://figshare.com/ndownloader/files/52062392
Expand Down
6 changes: 2 additions & 4 deletions models/gnome/gnome.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,8 @@ notes:
Missing Preds: According to the authors, the 1734 missing WBM predictions are mostly due out-of-memory (OOM) errors. The model was evaluated on A100s but without neighbor lists. The plan is to backfill the missing predictions once H100s are available or neighbor list implementation goes live.

metrics:
geo_opt:
pred_file: # not (yet) shared by authors
pred_file_url:
pred_col:
phonons: not available # model is closed source, original GnoME submission predates phonon tasks and DeepMind did not resubmit later
geo_opt: not available # author's declined to share model-relaxed structures and can't be reproduced without model access
discovery:
pred_file: models/gnome/2023-11-01-gnome-50076332-wbm-IS2RE.csv.gz
pred_file_url: https://figshare.com/ndownloader/files/52057556
Expand Down
5 changes: 4 additions & 1 deletion models/grace/grace2l-r6.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ hyperparams:

metrics:
phonons:
κ_SRME: 0.525 # https://github.com/MPA2suite/k_SRME/pull/11/files
kappa_103:
κ_SRME: 0.525 # https://github.com/MPA2suite/k_SRME/pull/11/files
pred_file: models/grace/grace2l_r6/2024-11-20-kappa-103-FIRE-fmax=1e-4-symprec=1e-5.json.gz
pred_file_url: https://figshare.com/ndownloader/files/52134896
geo_opt:
pred_file: models/grace/grace2l_r6/2024-11-11-relaxed-structures.json.gz
pred_file_url: https://figshare.com/ndownloader/files/52062590
Expand Down
5 changes: 4 additions & 1 deletion models/m3gnet/m3gnet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ notes:

metrics:
phonons:
κ_SRME: 1.412
kappa_103:
κ_SRME: 1.412
pred_file: models/m3gnet/m3gnet-matgl-mp-2021-2-8-pes/2024-11-09-kappa-103-FIRE-dist=0.01-fmax=1e-4-symprec=1e-5.json.gz
pred_file_url: https://figshare.com/ndownloader/files/52134866
geo_opt:
pred_file: models/m3gnet/m3gnet-tf-manual-sampling/2023-06-01-wbm-geo-opt.json.gz
pred_file_url: https://figshare.com/ndownloader/files/52062011
Expand Down
6 changes: 4 additions & 2 deletions models/m3gnet/test_m3gnet_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,13 @@
if slurm_array_task_count > 1:
df_in = np.array_split(df_in, slurm_array_task_count)[slurm_array_task_id - 1]

checkpoint = None
if model_type == "direct":
checkpoint = f"{ROOT}/models/{model_name}/2023-05-26-DI-DFTstrictF10-TTRS-128U-442E"
if model_type == "ms":
elif model_type == "manual-sampling":
checkpoint = f"{ROOT}/models/{model_name}/2023-05-26-MS-DFTstrictF10-128U-154E"
else:
raise ValueError(f"{model_type=} not supported")

relax_results: dict[str, dict[str, Any]] = {}
m3gnet = Relaxer(potential=checkpoint) # load pre-trained M3GNet model

Expand Down
Loading
Loading