From 1de4083e12f5ad5da0810c2ceea7e548c66db037 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Sat, 3 Feb 2024 23:35:37 -0800 Subject: [PATCH 01/44] Remove the implicit dependency to mpi4py in __init__.py (issue #1888) (#2059) * Remove the implicit dependency to mpi4py in __init__.py (issue #1888) * import error * Import error in momgfccsd tests --- pyscf/cc/__init__.py | 3 --- pyscf/cc/test/test_momgfccsd.py | 23 ++++++++++++----------- pyscf/pbc/__all__.py | 3 +++ pyscf/post_scf.py | 3 +++ 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/pyscf/cc/__init__.py b/pyscf/cc/__init__.py index 6a037a27fe..6b4dc5d34e 100644 --- a/pyscf/cc/__init__.py +++ b/pyscf/cc/__init__.py @@ -75,7 +75,6 @@ from pyscf.cc import eom_uccsd from pyscf.cc import eom_gccsd from pyscf.cc import qcisd -from pyscf.cc import momgfccsd from pyscf import scf def CCSD(mf, frozen=None, mo_coeff=None, mo_occ=None): @@ -223,5 +222,3 @@ def _finalize(self): return self mycc._finalize = _finalize.__get__(mycc, mycc.__class__) return mycc - -MomGFCCSD = momgfccsd.MomGFCCSD diff --git a/pyscf/cc/test/test_momgfccsd.py b/pyscf/cc/test/test_momgfccsd.py index ef21f1e312..51d73a47a5 100644 --- a/pyscf/cc/test/test_momgfccsd.py +++ b/pyscf/cc/test/test_momgfccsd.py @@ -2,6 +2,7 @@ import unittest import numpy as np from pyscf import gto, scf, cc, lib +from pyscf.cc import momgfccsd class KnownValues(unittest.TestCase): @@ -22,7 +23,7 @@ def setUpClass(cls): cls.mycc.kernel() cls.mycc.solve_lambda() - gfcc = cc.momgfccsd.MomGFCCSD(cls.mycc, niter=(5, 5)) + gfcc = momgfccsd.MomGFCCSD(cls.mycc, niter=(5, 5)) imds = gfcc.make_imds() cls.hole_moments = gfcc.build_hole_moments(imds=imds) cls.part_moments = gfcc.build_part_moments(imds=imds) @@ -54,7 +55,7 @@ def tearDownClass(cls): def test_lambda_assertion(self): with lib.temporary_env(self.mycc, l1=None, l2=None): - gfcc = cc.momgfccsd.MomGFCCSD(self.mycc, niter=(0, 0)) + gfcc = momgfccsd.MomGFCCSD(self.mycc, niter=(0, 0)) self.assertRaises(ValueError, gfcc.kernel) def _test_moments(self, e, v, nmax, ref): @@ -64,7 +65,7 @@ def _test_moments(self, e, v, nmax, ref): self.assertAlmostEqual(np.max(np.abs(m1-m2)), 0.0, 7) def _test_niter(self, niter): - gfcc = cc.momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) + gfcc = momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) eh, vh, ep, vp = gfcc.kernel() self.assertAlmostEqual(gfcc.ipgfccsd(nroots=1)[0], self.ips[niter]) self.assertAlmostEqual(gfcc.eagfccsd(nroots=1)[0], self.eas[niter]) @@ -96,7 +97,7 @@ def test_amp_input(self): imds.make_ea() t1, t2, l1, l2 = self.mycc.t1, self.mycc.t2, self.mycc.l1, self.mycc.l2 with lib.temporary_env(self.mycc, t1=None, t2=None, l1=None, l2=None): - gfcc = cc.momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) + gfcc = momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) eh, vh, ep, vp = gfcc.kernel(t1=t1, t2=t2, l1=l1, l2=l2, imds=imds) self.assertAlmostEqual(gfcc.ipgfccsd(nroots=1)[0], self.ips[niter]) self.assertAlmostEqual(gfcc.eagfccsd(nroots=1)[0], self.eas[niter]) @@ -105,7 +106,7 @@ def test_amp_input(self): def test_mom_input(self): niter = 2 - gfcc = cc.momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) + gfcc = momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) hole_moments = self.hole_moments[:2*niter+2] part_moments = self.part_moments[:2*niter+2] eh, vh, ep, vp = gfcc.kernel(hole_moments=hole_moments, part_moments=part_moments) @@ -116,7 +117,7 @@ def test_mom_input(self): def test_hermi_moments(self): niter = 2 - gfcc = cc.momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) + gfcc = momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) gfcc.hermi_moments = True hole_moments = self.hole_moments[:2*niter+2] part_moments = self.part_moments[:2*niter+2] @@ -128,7 +129,7 @@ def test_hermi_moments(self): def test_hermi_moments(self): niter = 2 - gfcc = cc.momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) + gfcc = momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) gfcc.hermi_moments = True gfcc.hermi_solver = True hole_moments = self.hole_moments[:2*niter+2] @@ -141,7 +142,7 @@ def test_hermi_moments(self): def test_misc(self): niter = 2 - gfcc = cc.momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) + gfcc = momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) gfcc.reset() eh, vh, ep, vp = gfcc.kernel() self.assertAlmostEqual(gfcc.ipgfccsd(nroots=1)[0], self.ips[niter]) @@ -157,14 +158,14 @@ def test_misc(self): def test_chkfile(self): niter = 1 - gfcc = cc.momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) + gfcc = momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) eh, vh, ep, vp = gfcc.kernel() self.assertAlmostEqual(gfcc.ipgfccsd(nroots=1)[0], self.ips[niter]) self.assertAlmostEqual(gfcc.eagfccsd(nroots=1)[0], self.eas[niter]) self._test_moments(eh, vh, 2*niter+1, self.hole_moments) self._test_moments(ep, vp, 2*niter+1, self.part_moments) gfcc.dump_chk(chkfile="tmp.chk") - gfcc = cc.momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) + gfcc = momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) gfcc.update("tmp.chk") self.assertAlmostEqual(gfcc.ipgfccsd(nroots=1)[0], self.ips[niter]) self.assertAlmostEqual(gfcc.eagfccsd(nroots=1)[0], self.eas[niter]) @@ -184,7 +185,7 @@ def test_density_fitting(self): mycc.solve_lambda() niter = 3 - gfcc = cc.momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) + gfcc = momgfccsd.MomGFCCSD(self.mycc, niter=(niter, niter)) eh, vh, ep, vp = gfcc.kernel() self.assertAlmostEqual(gfcc.ipgfccsd(nroots=1)[0], self.ips[niter]) self.assertAlmostEqual(gfcc.eagfccsd(nroots=1)[0], self.eas[niter]) diff --git a/pyscf/pbc/__all__.py b/pyscf/pbc/__all__.py index f542b5fb09..51ad5899e9 100644 --- a/pyscf/pbc/__all__.py +++ b/pyscf/pbc/__all__.py @@ -13,3 +13,6 @@ from . import dft except (ImportError, IOError): pass + +# Note the mpicc module implicitly import mpi4py. This module should not be +# automatically imported until the dependency to mpi4py is completely removed. diff --git a/pyscf/post_scf.py b/pyscf/post_scf.py index aba028fe92..821362ffc8 100644 --- a/pyscf/post_scf.py +++ b/pyscf/post_scf.py @@ -3,3 +3,6 @@ from . import doci except ImportError: pass + +# Note the agf2 module implicitly import mpi4py. This module should not be +# automatically imported until the dependency to mpi4py is completely removed. From 7185c5c3b7c3e64a29a2134d29442cb5d707c23d Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Sat, 3 Feb 2024 15:17:04 -0800 Subject: [PATCH 02/44] Fix df-grad for UHF when symmetry is enabled (issue #2054) --- pyscf/df/grad/rhf.py | 6 +++--- pyscf/df/test/test_df_grad.py | 1 + pyscf/scf/uhf_symm.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pyscf/df/grad/rhf.py b/pyscf/df/grad/rhf.py index 7c2db50f0d..0c945d8d21 100644 --- a/pyscf/df/grad/rhf.py +++ b/pyscf/df/grad/rhf.py @@ -342,6 +342,9 @@ def _decompose_rdm1 (mf_grad, mol, dm): if hasattr (dm, 'mo_coeff') and hasattr (dm, 'mo_occ'): mo_coeff = dm.mo_coeff mo_occ = dm.mo_occ + if getattr(mo_occ, 'ndim', None) == 1: # RHF orbitals + mo_coeff = [mo_coeff] + mo_occ = [mo_occ] else: s0 = mol.intor ('int1e_ovlp') mo_occ = [] @@ -352,10 +355,7 @@ def _decompose_rdm1 (mf_grad, mol, dm): mo_occ.append (n) mo_coeff.append (c) mo_occ = numpy.stack (mo_occ, axis=0) - nmo = mo_occ.shape[-1] - mo_coeff = numpy.asarray(mo_coeff).reshape(-1,nao,nmo) - mo_occ = numpy.asarray(mo_occ).reshape(-1,nmo) orbor = [] orbol = [] for i in range(nset): diff --git a/pyscf/df/test/test_df_grad.py b/pyscf/df/test/test_df_grad.py index 710af63fd6..b63cf284a2 100644 --- a/pyscf/df/test/test_df_grad.py +++ b/pyscf/df/test/test_df_grad.py @@ -90,6 +90,7 @@ def test_uhf_grad(self): ['O' , (0. , 0. , 0.)], [1 , (0. , -0.757 , 0.587)], [1 , (0. , 0.757 , 0.587)] ] + mol.symmetry = True mol.verbose = 0 mol.basis = '631g' mol.spin = 2 diff --git a/pyscf/scf/uhf_symm.py b/pyscf/scf/uhf_symm.py index d289049405..1ea38b0956 100644 --- a/pyscf/scf/uhf_symm.py +++ b/pyscf/scf/uhf_symm.py @@ -530,7 +530,7 @@ def _finalize(self): mo_b = lib.tag_array(self.mo_coeff[1][:,idxb], orbsym=orbsymb, degen_mapping=degen_b) self.mo_coeff = (mo_a, mo_b) - self.mo_occ = (self.mo_occ[0][idxa], self.mo_occ[1][idxb]) + self.mo_occ = numpy.asarray([self.mo_occ[0][idxa], self.mo_occ[1][idxb]]) if self.chkfile: chkfile.dump_scf(self.mol, self.chkfile, self.e_tot, self.mo_energy, self.mo_coeff, self.mo_occ, overwrite_mol=False) From 68952e57a48fc236169873c535b2580c7e9a6875 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Thu, 1 Feb 2024 14:50:23 -0800 Subject: [PATCH 03/44] Check cylindrical symmetry of the orbitals for FCI solver (issue #2022) --- pyscf/fci/direct_spin1_cyl_sym.py | 10 +++++++--- pyscf/fci/direct_spin1_symm.py | 12 ++++++++++++ pyscf/fci/test/test_spin1_cyl_sym.py | 9 +++++++++ pyscf/fci/test/test_spin1_symm.py | 13 ++++++++++++- 4 files changed, 40 insertions(+), 4 deletions(-) diff --git a/pyscf/fci/direct_spin1_cyl_sym.py b/pyscf/fci/direct_spin1_cyl_sym.py index 0056bbd992..80aa27192f 100644 --- a/pyscf/fci/direct_spin1_cyl_sym.py +++ b/pyscf/fci/direct_spin1_cyl_sym.py @@ -42,9 +42,9 @@ from pyscf.fci import cistring from pyscf.fci import direct_spin1 from pyscf.fci import direct_spin1_symm -from pyscf.fci.direct_spin1_symm import (_sv_associated_det, - _strs_angular_momentum, - _cyl_sym_orbital_rotation) +from pyscf.fci.direct_spin1_symm import ( + _sv_associated_det, _strs_angular_momentum, _cyl_sym_orbital_rotation, + _validate_degen_mapping) from pyscf.fci import direct_nosym from pyscf.fci import addons from pyscf import __config__ @@ -558,6 +558,10 @@ def kernel(self, h1e, eri, norb, nelec, ci0=None, if not hasattr(orbsym, 'degen_mapping'): degen_mapping = map_degeneracy(h1e.diagonal(), orbsym) orbsym = lib.tag_array(orbsym, degen_mapping=degen_mapping) + if not _validate_degen_mapping(orbsym.degen_mapping, norb): + raise lib.exceptions.PointGroupSymmetryError( + 'Incomplete 2D-irrep orbitals for cylindrical symmetry.\n' + f'orbsym = {orbsym}.') u = _cyl_sym_orbital_rotation(orbsym, orbsym.degen_mapping) h1e = u.dot(h1e).dot(u.conj().T) diff --git a/pyscf/fci/direct_spin1_symm.py b/pyscf/fci/direct_spin1_symm.py index 272a432ec6..017505836a 100644 --- a/pyscf/fci/direct_spin1_symm.py +++ b/pyscf/fci/direct_spin1_symm.py @@ -257,6 +257,13 @@ def get_init_guess(norb, nelec, nroots, hdiag, orbsym, wfnsym=0): raise RuntimeError(f'Initial guess for symmetry {wfnsym} not found') return ci0 +def _validate_degen_mapping(mapping, norb): + '''Check if 2D irreps are properly paired''' + mapping = np.asarray(mapping) + return (mapping.max() < norb and + # Must be self-conjugated + numpy.array_equal(mapping[mapping], numpy.arange(norb))) + def get_init_guess_cyl_sym(norb, nelec, nroots, hdiag, orbsym, wfnsym=0): neleca, nelecb = _unpack_nelec(nelec) strsa = strsb = cistring.gen_strings4orblist(range(norb), neleca) @@ -751,6 +758,11 @@ def kernel(self, h1e, eri, norb, nelec, ci0=None, orbsym = lib.tag_array(orbsym, degen_mapping=degen_mapping) if davidson_only is None: davidson_only = True + if not _validate_degen_mapping(orbsym.degen_mapping, norb): + raise lib.exceptions.PointGroupSymmetryError( + 'Incomplete 2D-irrep orbitals for cylindrical symmetry.\n' + f'orbsym = {orbsym}. ' + f'Retry {self.__class__} with D2h subgroup symmetry.') wfnsym_ir = self.guess_wfnsym(norb, nelec, ci0, orbsym, wfnsym, **kwargs) self.sym_allowed_idx = sym_allowed_indices(nelec, orbsym, wfnsym_ir) diff --git a/pyscf/fci/test/test_spin1_cyl_sym.py b/pyscf/fci/test/test_spin1_cyl_sym.py index 84e014d690..0648528e31 100644 --- a/pyscf/fci/test/test_spin1_cyl_sym.py +++ b/pyscf/fci/test/test_spin1_cyl_sym.py @@ -212,6 +212,15 @@ def test_linearmole_a2(self): mc.run() self.assertAlmostEqual(mc.e_tot, 2.8999951068356475, 8) + def test_incomplete_orbsym(self): + sol = direct_spin1_cyl_sym.FCI(gto.Mole()) + no, ne = 2, 2 + h1 = np.ones((no,no)) + h2 = np.ones((no,no,no,no)) + orbsym = lib.tag_array(np.array([0,3]), degen_mapping=[0,2]) + with self.assertRaises(lib.exceptions.PointGroupSymmetryError): + sol.kernel(h1, h2, no, ne, orbsym=orbsym) + if __name__ == "__main__": print("Full Tests for spin1-symm") unittest.main() diff --git a/pyscf/fci/test/test_spin1_symm.py b/pyscf/fci/test/test_spin1_symm.py index 462009e3dd..4b63d48d27 100644 --- a/pyscf/fci/test/test_spin1_symm.py +++ b/pyscf/fci/test/test_spin1_symm.py @@ -15,7 +15,7 @@ import unittest import numpy -from pyscf import gto +from pyscf import gto, lib from pyscf import scf from pyscf import ao2mo from pyscf import fci @@ -197,6 +197,17 @@ def test_linearmole(self): ci1 = fci.addons.transform_ci(ci_y, (3,3), u.T) self.assertAlmostEqual(abs(ci1.ravel().dot(ci_y.ravel())), 1, 9) + def test_incomplete_orbsym(self): + mol = gto.Mole() + mol.groupname = 'Dooh' + sol = direct_spin1_symm.FCI(mol) + no, ne = 2, 2 + h1 = numpy.ones((no,no)) + h2 = numpy.ones((no,no,no,no)) + orbsym = lib.tag_array(numpy.array([0,3]), degen_mapping=[0,2]) + with self.assertRaises(lib.exceptions.PointGroupSymmetryError): + sol.kernel(h1, h2, no, ne, orbsym=orbsym) + if __name__ == "__main__": print("Full Tests for spin1-symm") unittest.main() From 751103d903857e4792be2e995bb49bbe00d8a00e Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Sat, 3 Feb 2024 14:39:29 -0800 Subject: [PATCH 04/44] Fix bug for init_guess=atom (issue #2056) Fix dft get_veff tests --- pyscf/dft/test/test_h2o.py | 11 ++++++----- pyscf/scf/hf.py | 2 +- pyscf/scf/test/test_h2o.py | 6 ++++++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/pyscf/dft/test/test_h2o.py b/pyscf/dft/test/test_h2o.py index 7a147c9bae..7eee60fe8c 100644 --- a/pyscf/dft/test/test_h2o.py +++ b/pyscf/dft/test/test_h2o.py @@ -91,6 +91,7 @@ def tearDownModule(): class KnownValues(unittest.TestCase): def test_nr_lda(self): method = dft.RKS(h2o) + method.init_guess = 'atom' # initial guess problem, issue #2056 method.grids.prune = dft.gen_grid.treutler_prune method.grids.atom_grid = {"H": (50, 194), "O": (50, 194),} method.xc = 'lda, vwn_rpa' @@ -372,7 +373,7 @@ def test_nr_rks_nlc(self): self.assertAlmostEqual(lib.fp(vxc), 22.767792068559917, 8) method.xc = 'B97M_V' - vxc = method.get_veff(h2o, dm) + vxc = method.get_veff(h2o, dm, dm, vxc) self.assertAlmostEqual(lib.fp(vxc), 23.067046560473408, 8) def test_nr_rks_nlc_small_memory_high_cost(self): @@ -381,7 +382,7 @@ def test_nr_rks_nlc_small_memory_high_cost(self): method._eri = None method.max_memory = 0 method.xc = 'wB97M_V' - vxc = method.get_veff(h2o, dm, dm, vxc) + vxc = method.get_veff(h2o, dm) self.assertAlmostEqual(lib.fp(vxc), 22.767792068559917, 8) method._eri = None @@ -419,7 +420,7 @@ def test_nr_uks_nlc_high_cost(self): self.assertAlmostEqual(lib.fp(vxc[1]), 22.767792068559917, 8) method.xc = 'B97M_V' - vxc = method.get_veff(h2o, dm) + vxc = method.get_veff(h2o, dm, dm, vxc) self.assertAlmostEqual(lib.fp(vxc[0]), 23.067046560473408, 8) self.assertAlmostEqual(lib.fp(vxc[1]), 23.067046560473408, 8) @@ -430,7 +431,7 @@ def test_nr_uks_nlc_small_memory_high_cost(self): method._eri = None method.max_memory = 0 method.xc = 'wB97M_V' - vxc = method.get_veff(h2o, dm, dm, vxc) + vxc = method.get_veff(h2o, dm) self.assertAlmostEqual(lib.fp(vxc[0]), 22.767792068559917, 8) self.assertAlmostEqual(lib.fp(vxc[1]), 22.767792068559917, 8) @@ -471,7 +472,7 @@ def test_nr_gks_nlc_small_memory_high_cost(self): method._eri = None method.max_memory = 0 method.xc = 'wB97M_V' - vxc = method.get_veff(h2o, dm, dm, vxc) + vxc = method.get_veff(h2o, dm) self.assertAlmostEqual(lib.fp(vxc), 3.172920887028461+0j, 8) method._eri = None diff --git a/pyscf/scf/hf.py b/pyscf/scf/hf.py index b71de95b03..9f559265b5 100644 --- a/pyscf/scf/hf.py +++ b/pyscf/scf/hf.py @@ -506,7 +506,7 @@ def init_guess_by_atom(mol): dm = scipy.linalg.block_diag(*atm_dms) mo_coeff = scipy.linalg.block_diag(*mo_coeff) - mo_occ = numpy.hstack(occ) + mo_occ = numpy.hstack(mo_occ) if mol.cart: cart2sph = mol.cart2sph_coeff(normalized='sp') diff --git a/pyscf/scf/test/test_h2o.py b/pyscf/scf/test/test_h2o.py index 042d5bf3ad..706cb5df6a 100644 --- a/pyscf/scf/test/test_h2o.py +++ b/pyscf/scf/test/test_h2o.py @@ -191,6 +191,8 @@ def test_nr_uhf_symm(self): def test_init_guess_minao(self): dm = scf.hf.init_guess_by_minao(mol) + self.assertEqual(dm.mo_coeff.shape[0], mol.nao) + self.assertEqual(dm.mo_occ.size, dm.mo_coeff.shape[1]) s = scf.hf.get_ovlp(mol) occ, mo = scipy.linalg.eigh(dm, s, type=2) ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) @@ -213,6 +215,8 @@ def test_init_guess_minao(self): def test_init_guess_atom(self): dm = scf.hf.init_guess_by_atom(mol) + self.assertEqual(dm.mo_coeff.shape[0], mol.nao) + self.assertEqual(dm.mo_occ.size, dm.mo_coeff.shape[1]) s = scf.hf.get_ovlp(mol) occ, mo = scipy.linalg.eigh(dm, s, type=2) ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) @@ -240,6 +244,8 @@ def test_init_guess_atom(self): def test_init_guess_1e(self): dm = scf.hf.init_guess_by_1e(mol) + self.assertEqual(dm.mo_coeff.shape[0], mol.nao) + self.assertEqual(dm.mo_occ.size, dm.mo_coeff.shape[1]) s = scf.hf.get_ovlp(mol) occ, mo = scipy.linalg.eigh(dm, s, type=2) ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) From f32c4c0dbd0932df48e3b70bb7e19235066bfbfa Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Sat, 3 Feb 2024 14:07:39 -0800 Subject: [PATCH 05/44] Improve magmom initialization (issue #2055) --- pyscf/gto/mole.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pyscf/gto/mole.py b/pyscf/gto/mole.py index 76f5e8b95b..ab1e32c61c 100644 --- a/pyscf/gto/mole.py +++ b/pyscf/gto/mole.py @@ -1209,7 +1209,8 @@ def copy(mol, deep=True): newmol._ecp = copy.deepcopy(mol._ecp) newmol.pseudo = copy.deepcopy(mol.pseudo) newmol._pseudo = copy.deepcopy(mol._pseudo) - newmol.magmom = list(mol.magmom) + if mol.magmom: + newmol.magmom = list(mol.magmom) return newmol def pack(mol): @@ -2577,16 +2578,16 @@ def build(self, dump_input=True, parse_arg=ARGPARSE, # number of electrons are consistent. self.nelec - if self.magmom is None: + if not self.magmom: self.magmom = [0,] * self.natm elif len(self.magmom) != self.natm: logger.warn(self, 'len(magmom) != natm. Set magmom to zero') self.magmom = [0,] * self.natm + elif isinstance(self.magmom, np.ndarray): + self.magmom = self.magmom.tolist() if self.spin == 0 and abs(numpy.sum(self.magmom) - self.spin) > 1e-6: #don't check for unrestricted calcs. raise ValueError("mol.magmom is set incorrectly.") - if isinstance(self.magmom, np.ndarray): - self.magmom = self.magmom.tolist() if self.symmetry: self._build_symmetry() From 5592023258158e6f98c6f5148b5137fb998dab9a Mon Sep 17 00:00:00 2001 From: Hongzhou Ye Date: Sun, 4 Feb 2024 02:51:07 -0500 Subject: [PATCH 06/44] DIIS with damping (#2053) * update simple damping and add DIIS damping * update get_fock in solvent and dynamic level shift --------- Co-authored-by: hongzhouye <> --- pyscf/pbc/scf/khf.py | 10 +++++----- pyscf/pbc/scf/krohf.py | 7 ++++--- pyscf/pbc/scf/kuhf.py | 13 +++++++------ pyscf/pbc/scf/test/test_khf.py | 23 +++++++---------------- pyscf/scf/addons.py | 5 +++-- pyscf/scf/diis.py | 11 ++++++++--- pyscf/scf/hf.py | 32 ++++++++++++++++++-------------- pyscf/scf/rohf.py | 7 ++++--- pyscf/scf/test/test_rhf.py | 13 ++++++++----- pyscf/scf/test/test_uhf.py | 16 ++++++++-------- pyscf/scf/uhf.py | 11 ++++++----- pyscf/solvent/_attach_solvent.py | 5 +++-- 12 files changed, 81 insertions(+), 72 deletions(-) diff --git a/pyscf/pbc/scf/khf.py b/pyscf/pbc/scf/khf.py index 3c03e36661..1ef2d88908 100644 --- a/pyscf/pbc/scf/khf.py +++ b/pyscf/pbc/scf/khf.py @@ -125,7 +125,8 @@ def get_jk(mf, cell, dm_kpts, kpts, kpts_band=None, with_j=True, with_k=True, omega, exxdiv=mf.exxdiv) def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, - diis_start_cycle=None, level_shift_factor=None, damp_factor=None): + diis_start_cycle=None, level_shift_factor=None, damp_factor=None, + fock_last=None): h1e_kpts, s_kpts, vhf_kpts, dm_kpts = h1e, s1e, vhf, dm if h1e_kpts is None: h1e_kpts = mf.get_hcore() if vhf_kpts is None: vhf_kpts = mf.get_veff(mf.cell, dm_kpts) @@ -142,11 +143,10 @@ def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, if s_kpts is None: s_kpts = mf.get_ovlp() if dm_kpts is None: dm_kpts = mf.make_rdm1() - if 0 <= cycle < diis_start_cycle-1 and abs(damp_factor) > 1e-4: - f_kpts = [mol_hf.damping(s1e, dm_kpts[k] * 0.5, f_kpts[k], damp_factor) - for k, s1e in enumerate(s_kpts)] + if 0 <= cycle < diis_start_cycle-1 and abs(damp_factor) > 1e-4 and fock_last is not None: + f_kpts = [mol_hf.damping(f, f_prev, damp_factor) for f,f_prev in zip(f_kpts,fock_last)] if diis and cycle >= diis_start_cycle: - f_kpts = diis.update(s_kpts, dm_kpts, f_kpts, mf, h1e_kpts, vhf_kpts) + f_kpts = diis.update(s_kpts, dm_kpts, f_kpts, mf, h1e_kpts, vhf_kpts, f_prev=fock_last) if abs(level_shift_factor) > 1e-4: f_kpts = [mol_hf.level_shift(s, dm_kpts[k], f_kpts[k], level_shift_factor) for k, s in enumerate(s_kpts)] diff --git a/pyscf/pbc/scf/krohf.py b/pyscf/pbc/scf/krohf.py index fb9b8d7161..6a23588fb7 100644 --- a/pyscf/pbc/scf/krohf.py +++ b/pyscf/pbc/scf/krohf.py @@ -51,7 +51,8 @@ def make_rdm1(mo_coeff_kpts, mo_occ_kpts, **kwargs): return lib.tag_array((dma, dmb), mo_coeff=mo_coeff_kpts, mo_occ=mo_occ_kpts) def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, - diis_start_cycle=None, level_shift_factor=None, damp_factor=None): + diis_start_cycle=None, level_shift_factor=None, damp_factor=None, + fock_last=None): h1e_kpts, s_kpts, vhf_kpts, dm_kpts = h1e, s1e, vhf, dm if h1e_kpts is None: h1e_kpts = mf.get_hcore() if vhf_kpts is None: vhf_kpts = mf.get_veff(mf.cell, dm_kpts) @@ -71,10 +72,10 @@ def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, if dm_kpts is None: dm_kpts = mf.make_rdm1() dm_sf = dm_kpts[0] + dm_kpts[1] - if 0 <= cycle < diis_start_cycle-1 and abs(damp_factor) > 1e-4: + if 0 <= cycle < diis_start_cycle-1 and abs(damp_factor) > 1e-4 and fock_last is not None: raise NotImplementedError('ROHF Fock-damping') if diis and cycle >= diis_start_cycle: - f_kpts = diis.update(s_kpts, dm_sf, f_kpts, mf, h1e_kpts, vhf_kpts) + f_kpts = diis.update(s_kpts, dm_sf, f_kpts, mf, h1e_kpts, vhf_kpts, f_prev=fock_last) if abs(level_shift_factor) > 1e-4: f_kpts = [mol_hf.level_shift(s, dm_sf[k]*.5, f_kpts[k], level_shift_factor) for k, s in enumerate(s_kpts)] diff --git a/pyscf/pbc/scf/kuhf.py b/pyscf/pbc/scf/kuhf.py index f0b077ad36..af56a2ced3 100644 --- a/pyscf/pbc/scf/kuhf.py +++ b/pyscf/pbc/scf/kuhf.py @@ -59,7 +59,8 @@ def make_dm(mos, occs): return lib.tag_array(dm, mo_coeff=mo_coeff_kpts, mo_occ=mo_occ_kpts) def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, - diis_start_cycle=None, level_shift_factor=None, damp_factor=None): + diis_start_cycle=None, level_shift_factor=None, damp_factor=None, + fock_last=None): h1e_kpts, s_kpts, vhf_kpts, dm_kpts = h1e, s1e, vhf, dm if h1e_kpts is None: h1e_kpts = mf.get_hcore() if vhf_kpts is None: vhf_kpts = mf.get_veff(mf.cell, dm_kpts) @@ -85,15 +86,15 @@ def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, else: dampa = dampb = damp_factor - if 0 <= cycle < diis_start_cycle-1 and abs(dampa)+abs(dampb) > 1e-4: + if 0 <= cycle < diis_start_cycle-1 and abs(dampa)+abs(dampb) > 1e-4 and fock_last is not None: f_a = [] f_b = [] - for k, s1e in enumerate(s_kpts): - f_a.append(mol_hf.damping(s1e, dm_kpts[0][k], f_kpts[0][k], dampa)) - f_b.append(mol_hf.damping(s1e, dm_kpts[1][k], f_kpts[1][k], dampb)) + for k in range(len(s_kpts)): + f_a.append(mol_hf.damping(f_kpts[0][k], fock_last[0][k], dampa)) + f_b.append(mol_hf.damping(f_kpts[1][k], fock_last[1][k], dampa)) f_kpts = [f_a, f_b] if diis and cycle >= diis_start_cycle: - f_kpts = diis.update(s_kpts, dm_kpts, f_kpts, mf, h1e_kpts, vhf_kpts) + f_kpts = diis.update(s_kpts, dm_kpts, f_kpts, mf, h1e_kpts, vhf_kpts, f_prev=fock_last) if abs(level_shift_factor) > 1e-4: f_kpts =([mol_hf.level_shift(s, dm_kpts[0,k], f_kpts[0,k], shifta) for k, s in enumerate(s_kpts)], diff --git a/pyscf/pbc/scf/test/test_khf.py b/pyscf/pbc/scf/test/test_khf.py index 643776713d..7d39168fee 100644 --- a/pyscf/pbc/scf/test/test_khf.py +++ b/pyscf/pbc/scf/test/test_khf.py @@ -292,23 +292,14 @@ def test_small_system(self): def test_damping(self): nao = cell.nao np.random.seed(1) - s = kmf.get_ovlp() - d = np.random.random((len(kpts),nao,nao)) - d = (d + d.transpose(0,2,1)) * 2 - vhf = 0 - f = khf.get_fock(kmf, kmf.get_hcore(), s, vhf, d, cycle=0, - diis_start_cycle=2, damp_factor=0.5) - self.assertAlmostEqual(np.linalg.norm(f[0]), 95.32749551722966, 6) - self.assertAlmostEqual(np.linalg.norm(f[1]), 73.9231303798864, 6) - self.assertAlmostEqual(np.linalg.norm(f[2]), 58.973290554565196, 6) - - vhf = np.zeros((2,len(kpts),nao,nao)) - d1 = np.asarray([d/2, d/2]) - f1 = kuhf.get_fock(kumf, kumf.get_hcore(), s, vhf, d1, cycle=0, - diis_start_cycle=2, damp_factor=0.5) + f = kmf.get_hcore() + df = np.random.rand(len(kpts),nao,nao) + f_prev = f + df + damp = 0.3 + f_damp = khf.get_fock(kmf, h1e=0, s1e=0, vhf=f, dm=0, cycle=0, + diis_start_cycle=2, damp_factor=damp, fock_last=f_prev) for k in range(len(kpts)): - self.assertAlmostEqual(abs(f[k] - f1[0,k]).max(), 0, 9) - self.assertAlmostEqual(abs(f[k] - f1[1,k]).max(), 0, 9) + self.assertAlmostEqual(abs(f_damp[k] - (f[k]*(1-damp) + f_prev[k]*damp)).max(), 0, 9) if __name__ == '__main__': print("Full Tests for pbc.scf.khf") diff --git a/pyscf/scf/addons.py b/pyscf/scf/addons.py index ae25c6874b..213d11721a 100644 --- a/pyscf/scf/addons.py +++ b/pyscf/scf/addons.py @@ -406,7 +406,8 @@ def dynamic_level_shift_(mf, factor=1.): old_get_fock = mf.get_fock mf._last_e = None def get_fock(h1e, s1e, vhf, dm, cycle=-1, diis=None, - diis_start_cycle=None, level_shift_factor=None, damp_factor=None): + diis_start_cycle=None, level_shift_factor=None, damp_factor=None, + fock_last=None): if cycle > 0 or diis is not None: if 'exc' in mf.scf_summary: # DFT e_tot = mf.scf_summary['e1'] + mf.scf_summary['coul'] + mf.scf_summary['exc'] @@ -417,7 +418,7 @@ def get_fock(h1e, s1e, vhf, dm, cycle=-1, diis=None, logger.info(mf, 'Set level shift to %g', level_shift_factor) mf._last_e = e_tot return old_get_fock(h1e, s1e, vhf, dm, cycle, diis, diis_start_cycle, - level_shift_factor, damp_factor) + level_shift_factor, damp_factor, fock_last=fock_last) mf.get_fock = get_fock return mf dynamic_level_shift = dynamic_level_shift_ diff --git a/pyscf/scf/diis.py b/pyscf/scf/diis.py index 9f273ed5a1..321f81cdfe 100644 --- a/pyscf/scf/diis.py +++ b/pyscf/scf/diis.py @@ -43,6 +43,7 @@ def __init__(self, mf=None, filename=None, Corth=None): self.rollback = 0 self.space = 8 self.Corth = Corth + self.damp = 0 #?self._scf = mf #?if hasattr(self._scf, 'get_orbsym'): # Symmetry adapted SCF objects #? self.orbsym = mf.get_orbsym(Corth) @@ -51,7 +52,11 @@ def __init__(self, mf=None, filename=None, Corth=None): def update(self, s, d, f, *args, **kwargs): errvec = get_err_vec(s, d, f, self.Corth) logger.debug1(self, 'diis-norm(errvec)=%g', numpy.linalg.norm(errvec)) - xnew = lib.diis.DIIS.update(self, f, xerr=errvec) + f_prev = kwargs.get('f_prev', None) + if abs(self.damp) < 1e-6 or f_prev is None: + xnew = lib.diis.DIIS.update(self, f, xerr=errvec) + else: + xnew = lib.diis.DIIS.update(self, f*(1-self.damp) + f_prev*self.damp, xerr=errvec) if self.rollback > 0 and len(self._bookkeep) == self.space: self._bookkeep = self._bookkeep[-self.rollback:] return xnew @@ -125,7 +130,7 @@ class EDIIS(lib.diis.DIIS): '''SCF-EDIIS Ref: JCP 116, 8255 (2002); DOI:10.1063/1.1470195 ''' - def update(self, s, d, f, mf, h1e, vhf): + def update(self, s, d, f, mf, h1e, vhf, *args, **kwargs): if self._head >= self.space: self._head = 0 if not self._buffer: @@ -185,7 +190,7 @@ class ADIIS(lib.diis.DIIS): ''' Ref: JCP 132, 054109 (2010); DOI:10.1063/1.3304922 ''' - def update(self, s, d, f, mf, h1e, vhf): + def update(self, s, d, f, mf, h1e, vhf, *args, **kwargs): if self._head >= self.space: self._head = 0 if not self._buffer: diff --git a/pyscf/scf/hf.py b/pyscf/scf/hf.py index 9f559265b5..b6ecb5ace0 100644 --- a/pyscf/scf/hf.py +++ b/pyscf/scf/hf.py @@ -149,6 +149,7 @@ def kernel(mf, conv_tol=1e-10, conv_tol_grad=None, mf_diis = mf.DIIS(mf, mf.diis_file) mf_diis.space = mf.diis_space mf_diis.rollback = mf.diis_space_rollback + mf_diis.damp = mf.diis_damp # We get the used orthonormalized AO basis from any old eigendecomposition. # Since the ingredients for the Fock matrix has already been built, we can @@ -166,12 +167,13 @@ def kernel(mf, conv_tol=1e-10, conv_tol_grad=None, # A preprocessing hook before the SCF iteration mf.pre_kernel(locals()) + fock_last = None cput1 = logger.timer(mf, 'initialize scf', *cput0) for cycle in range(mf.max_cycle): dm_last = dm last_hf_e = e_tot - fock = mf.get_fock(h1e, s1e, vhf, dm, cycle, mf_diis) + fock = mf.get_fock(h1e, s1e, vhf, dm, cycle, mf_diis, fock_last=fock_last) mo_energy, mo_coeff = mf.eig(fock, s1e) mo_occ = mf.get_occ(mo_energy, mo_coeff) dm = mf.make_rdm1(mo_coeff, mo_occ) @@ -181,6 +183,7 @@ def kernel(mf, conv_tol=1e-10, conv_tol_grad=None, # Here Fock matrix is h1e + vhf, without DIIS. Calling get_fock # instead of the statement "fock = h1e + vhf" because Fock matrix may # be modified in some methods. + fock_last = fock fock = mf.get_fock(h1e, s1e, vhf, dm) # = h1e + vhf, no DIIS norm_gorb = numpy.linalg.norm(mf.get_grad(mo_coeff, mo_occ, fock)) if not TIGHT_GRAD_CONV_TOL: @@ -753,14 +756,8 @@ def level_shift(s, d, f, factor): return f + dm_vir * factor -def damping(s, d, f, factor): - #dm_vir = s - reduce(numpy.dot, (s,d,s)) - #sinv = numpy.linalg.inv(s) - #f0 = reduce(numpy.dot, (dm_vir, sinv, f, d, s)) - dm_vir = numpy.eye(s.shape[0]) - numpy.dot(s, d) - f0 = reduce(numpy.dot, (dm_vir, f, d, s)) - f0 = (f0+f0.conj().T) * (factor/(factor+1.)) - return f - f0 +def damping(f, f_prev, factor): + return f*(1-factor) + f_prev*factor # full density matrix for RHF @@ -990,7 +987,8 @@ def get_veff(mol, dm, dm_last=None, vhf_last=None, hermi=1, vhfopt=None): return vj - vk * .5 + numpy.asarray(vhf_last) def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, - diis_start_cycle=None, level_shift_factor=None, damp_factor=None): + diis_start_cycle=None, level_shift_factor=None, damp_factor=None, + fock_last=None): '''F = h^{core} + V^{HF} Special treatment (damping, DIIS, or level shift) will be applied to the @@ -1030,10 +1028,10 @@ def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, if s1e is None: s1e = mf.get_ovlp() if dm is None: dm = mf.make_rdm1() - if 0 <= cycle < diis_start_cycle-1 and abs(damp_factor) > 1e-4: - f = damping(s1e, dm*.5, f, damp_factor) + if 0 <= cycle < diis_start_cycle-1 and abs(damp_factor) > 1e-4 and fock_last is not None: + f = damping(f, fock_last, damp_factor) if diis is not None and cycle >= diis_start_cycle: - f = diis.update(s1e, dm, f, mf, h1e, vhf) + f = diis.update(s1e, dm, f, mf, h1e, vhf, f_prev=fock_last) if abs(level_shift_factor) > 1e-4: f = level_shift(s1e, dm*.5, f, level_shift_factor) return f @@ -1463,6 +1461,8 @@ class SCF(lib.StreamObject): vector) will be reused. diis_space : int DIIS space size. By default, 8 Fock matrices and errors vector are stored. + diis_damp : float + DIIS damping factor. Default is 0. diis_start_cycle : int The step to start DIIS. Default is 1. diis_file: 'str' @@ -1515,6 +1515,7 @@ class SCF(lib.StreamObject): DIIS = diis.SCF_DIIS diis = getattr(__config__, 'scf_hf_SCF_diis', True) diis_space = getattr(__config__, 'scf_hf_SCF_diis_space', 8) + diis_damp = getattr(__config__, 'scf_hf_SCF_diis_damp', 0) # need > 0 if initial DM is numpy.zeros array diis_start_cycle = getattr(__config__, 'scf_hf_SCF_diis_start_cycle', 1) diis_file = None @@ -1530,7 +1531,7 @@ class SCF(lib.StreamObject): _keys = { 'conv_tol', 'conv_tol_grad', 'max_cycle', 'init_guess', - 'DIIS', 'diis', 'diis_space', 'diis_start_cycle', + 'DIIS', 'diis', 'diis_space', 'diis_damp', 'diis_start_cycle', 'diis_file', 'diis_space_rollback', 'damp', 'level_shift', 'direct_scf', 'direct_scf_tol', 'conv_check', 'callback', 'mol', 'chkfile', 'mo_energy', 'mo_coeff', 'mo_occ', @@ -1597,10 +1598,13 @@ def dump_flags(self, verbose=None): log.info('DIIS = %s', self.diis) log.info('diis_start_cycle = %d', self.diis_start_cycle) log.info('diis_space = %d', self.diis.space) + if getattr(self.diis, 'damp', None): + log.info('diis_damp = %g', self.diis.damp) elif self.diis: log.info('DIIS = %s', self.DIIS) log.info('diis_start_cycle = %d', self.diis_start_cycle) log.info('diis_space = %d', self.diis_space) + log.info('diis_damp = %g', self.diis_damp) else: log.info('DIIS disabled') log.info('SCF conv_tol = %g', self.conv_tol) diff --git a/pyscf/scf/rohf.py b/pyscf/scf/rohf.py index 6be51ebcd7..951e08a526 100644 --- a/pyscf/scf/rohf.py +++ b/pyscf/scf/rohf.py @@ -73,7 +73,8 @@ def init_guess_by_chkfile(mol, chkfile_name, project=None): return lib.tag_array(dm, mo_coeff=mo_coeff, mo_occ=mo_occ) def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, - diis_start_cycle=None, level_shift_factor=None, damp_factor=None): + diis_start_cycle=None, level_shift_factor=None, damp_factor=None, + fock_last=None): '''Build fock matrix based on Roothaan's effective fock. See also :func:`get_roothaan_fock` ''' @@ -100,10 +101,10 @@ def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, damp_factor = mf.damp dm_tot = dm[0] + dm[1] - if 0 <= cycle < diis_start_cycle-1 and abs(damp_factor) > 1e-4: + if 0 <= cycle < diis_start_cycle-1 and abs(damp_factor) > 1e-4 and fock_last is not None: raise NotImplementedError('ROHF Fock-damping') if diis and cycle >= diis_start_cycle: - f = diis.update(s1e, dm_tot, f, mf, h1e, vhf) + f = diis.update(s1e, dm_tot, f, mf, h1e, vhf, f_prev=fock_last) if abs(level_shift_factor) > 1e-4: f = hf.level_shift(s1e, dm_tot*.5, f, level_shift_factor) f = lib.tag_array(f, focka=focka, fockb=fockb) diff --git a/pyscf/scf/test/test_rhf.py b/pyscf/scf/test/test_rhf.py index 200a495e6e..5690e466f6 100644 --- a/pyscf/scf/test/test_rhf.py +++ b/pyscf/scf/test/test_rhf.py @@ -416,11 +416,14 @@ def test_nr_rohf(self): def test_damping(self): nao = mol.nao_nr() numpy.random.seed(1) - s = scf.hf.get_ovlp(mol) - d = numpy.random.random((nao,nao)) - d = d + d.T - f = scf.hf.damping(s, d, scf.hf.get_hcore(mol), .5) - self.assertAlmostEqual(numpy.linalg.norm(f), 23361.854064083178, 9) + f = scf.hf.get_hcore(mol) + df = numpy.random.rand(nao,nao) + df += df.T + f_prev = f + df + damp = 0.3 + f_damp = scf.hf.get_fock(mf, h1e=0, s1e=0, vhf=f, dm=0, cycle=0, + diis_start_cycle=2, damp_factor=damp, fock_last=f_prev) + self.assertAlmostEqual(abs(f_damp - (f*(1-damp) + f_prev*damp)).max(), 0, 9) def test_level_shift(self): nao = mol.nao_nr() diff --git a/pyscf/scf/test/test_uhf.py b/pyscf/scf/test/test_uhf.py index fc7607d3d5..36c84e28cf 100644 --- a/pyscf/scf/test/test_uhf.py +++ b/pyscf/scf/test/test_uhf.py @@ -399,14 +399,14 @@ def test_get_occ_extreme_case(self): def test_damping(self): nao = mol.nao_nr() numpy.random.seed(1) - s = scf.hf.get_ovlp(mol) - d = numpy.random.random((nao,nao)) - d = (d + d.T) * 2 - vhf = 0 - f = scf.uhf.get_fock(mf, scf.hf.get_hcore(mol), s, vhf, d, cycle=0, - diis_start_cycle=2, damp_factor=0.5) - self.assertAlmostEqual(numpy.linalg.norm(f[0]), 23361.854064083178, 9) - self.assertAlmostEqual(numpy.linalg.norm(f[1]), 23361.854064083178, 9) + f = numpy.asarray([scf.hf.get_hcore(mol)]*2) + df = numpy.random.rand(2,nao,nao) + f_prev = f + df + damp = 0.3 + f_damp = scf.uhf.get_fock(mf, h1e=0, s1e=0, vhf=f, dm=0, cycle=0, + diis_start_cycle=2, damp_factor=damp, fock_last=f_prev) + self.assertAlmostEqual(abs(f_damp[0] - (f[0]*(1-damp) + f_prev[0]*damp)).max(), 0, 9) + self.assertAlmostEqual(abs(f_damp[1] - (f[1]*(1-damp) + f_prev[1]*damp)).max(), 0, 9) def test_get_irrep_nelec(self): fock = n2mf.get_fock() diff --git a/pyscf/scf/uhf.py b/pyscf/scf/uhf.py index 2200c5e0e0..4f07335bd6 100644 --- a/pyscf/scf/uhf.py +++ b/pyscf/scf/uhf.py @@ -236,7 +236,8 @@ def get_veff(mol, dm, dm_last=0, vhf_last=0, hermi=1, vhfopt=None): return vhf def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, - diis_start_cycle=None, level_shift_factor=None, damp_factor=None): + diis_start_cycle=None, level_shift_factor=None, damp_factor=None, + fock_last=None): if h1e is None: h1e = mf.get_hcore() if vhf is None: vhf = mf.get_veff(mf.mol, dm) f = numpy.asarray(h1e) + vhf @@ -265,11 +266,11 @@ def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, if isinstance(dm, numpy.ndarray) and dm.ndim == 2: dm = [dm*.5] * 2 - if 0 <= cycle < diis_start_cycle-1 and abs(dampa)+abs(dampb) > 1e-4: - f = (hf.damping(s1e, dm[0], f[0], dampa), - hf.damping(s1e, dm[1], f[1], dampb)) + if 0 <= cycle < diis_start_cycle-1 and abs(dampa)+abs(dampb) > 1e-4 and fock_last is not None: + f = (hf.damping(f[0], fock_last[0], dampa), + hf.damping(f[1], fock_last[1], dampa)) if diis and cycle >= diis_start_cycle: - f = diis.update(s1e, dm, f, mf, h1e, vhf) + f = diis.update(s1e, dm, f, mf, h1e, vhf, f_prev=fock_last) if abs(shifta)+abs(shiftb) > 1e-4: f = (hf.level_shift(s1e, dm[0], f[0], shifta), hf.level_shift(s1e, dm[1], f[1], shiftb)) diff --git a/pyscf/solvent/_attach_solvent.py b/pyscf/solvent/_attach_solvent.py index ab5d58dc7c..1f98b9677d 100644 --- a/pyscf/solvent/_attach_solvent.py +++ b/pyscf/solvent/_attach_solvent.py @@ -92,14 +92,15 @@ def get_veff(self, mol=None, dm=None, *args, **kwargs): def get_fock(self, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, diis_start_cycle=None, - level_shift_factor=None, damp_factor=None): + level_shift_factor=None, damp_factor=None, fock_last=None): # DIIS was called inside super().get_fock. v_solvent, as a function of # dm, should be extrapolated as well. To enable it, v_solvent has to be # added to the fock matrix before DIIS was called. if getattr(vhf, 'v_solvent', None) is None: vhf = self.get_veff(self.mol, dm) return super().get_fock(h1e, s1e, vhf+vhf.v_solvent, dm, cycle, diis, - diis_start_cycle, level_shift_factor, damp_factor) + diis_start_cycle, level_shift_factor, damp_factor, + fock_last) def energy_elec(self, dm=None, h1e=None, vhf=None): if dm is None: From c2d539e190c26a2e3ef8b905c6aac27ec8216a82 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Tue, 14 Nov 2023 13:50:35 -0800 Subject: [PATCH 07/44] Add level shift to CPHF solver --- pyscf/hessian/rhf.py | 19 +++++-- pyscf/hessian/test/test_rhf.py | 12 +++++ pyscf/hessian/test/test_uhf.py | 9 ++++ pyscf/hessian/uhf.py | 9 ++-- pyscf/lib/linalg_helper.py | 4 +- pyscf/lib/test/test_linalg_helper.py | 18 +++++++ pyscf/scf/cphf.py | 56 +++++++++++++------- pyscf/scf/ucphf.py | 76 +++++++++++++++++----------- 8 files changed, 146 insertions(+), 57 deletions(-) diff --git a/pyscf/hessian/rhf.py b/pyscf/hessian/rhf.py index 90e7db492d..a11fc9d7ff 100644 --- a/pyscf/hessian/rhf.py +++ b/pyscf/hessian/rhf.py @@ -296,7 +296,8 @@ def _get_jk(mol, intor, comp, aosym, script_dms, return vs def solve_mo1(mf, mo_energy, mo_coeff, mo_occ, h1ao_or_chkfile, - fx=None, atmlst=None, max_memory=4000, verbose=None, max_cycle=50): + fx=None, atmlst=None, max_memory=4000, verbose=None, + max_cycle=50, level_shift=0): '''Solve the first order equation Kwargs: @@ -343,7 +344,8 @@ def _ao2mo(mat): h1vo = numpy.vstack(h1vo) s1vo = numpy.vstack(s1vo) - mo1, e1 = cphf.solve(fx, mo_energy, mo_occ, h1vo, s1vo, max_cycle=max_cycle) + mo1, e1 = cphf.solve(fx, mo_energy, mo_occ, h1vo, s1vo, + max_cycle=max_cycle, level_shift=level_shift) mo1 = numpy.einsum('pq,xqi->xpi', mo_coeff, mo1).reshape(-1,3,nao,nocc) e1 = e1.reshape(-1,3,nocc,nocc) @@ -470,8 +472,15 @@ def h_op(x): class HessianBase(lib.StreamObject): '''Non-relativistic restricted Hartree-Fock hessian''' + # Max. number of iterations for Krylov solver + max_cycle = 50 + # Shift virtual orbitals to slightly improve the convergence speed of Krylov solver + # A small level_shift ~ 0.1 is often helpful to decrease 2 - 3 iterations + # while the error of cphf solver may be increased by one magnitude. + level_shift = 0 + _keys = { - 'mol', 'base', 'chkfile', 'atmlst', 'de', 'max_cycle' + 'mol', 'base', 'chkfile', 'atmlst', 'de', 'max_cycle', 'level_shift' } def __init__(self, scf_method): @@ -481,7 +490,6 @@ def __init__(self, scf_method): self.base = scf_method self.chkfile = scf_method.chkfile self.max_memory = self.mol.max_memory - self.max_cycle = 50 self.atmlst = range(self.mol.natm) self.de = numpy.zeros((0,0,3,3)) # (A,B,dR_A,dR_B) @@ -566,7 +574,8 @@ def get_hcore(iatm, jatm): def solve_mo1(self, mo_energy, mo_coeff, mo_occ, h1ao_or_chkfile, fx=None, atmlst=None, max_memory=4000, verbose=None): return solve_mo1(self.base, mo_energy, mo_coeff, mo_occ, h1ao_or_chkfile, - fx, atmlst, max_memory, verbose, max_cycle=self.max_cycle) + fx, atmlst, max_memory, verbose, + max_cycle=self.max_cycle, level_shift=self.level_shift) def hess_nuc(self, mol=None, atmlst=None): if mol is None: mol = self.mol diff --git a/pyscf/hessian/test/test_rhf.py b/pyscf/hessian/test/test_rhf.py index e1ae1f7087..7f3bfdb2f9 100644 --- a/pyscf/hessian/test/test_rhf.py +++ b/pyscf/hessian/test/test_rhf.py @@ -35,6 +35,18 @@ def tearDownModule(): del mol class KnownValues(unittest.TestCase): + def test_rhf_hess(self): + mf = scf.RHF(mol) + e0 = mf.kernel() + hess = hessian.RHF(mf).kernel() + self.assertAlmostEqual(lib.fp(hess), -0.7816352153153946, 6) + + hobj = hessian.RHF(mf) + hobj.max_cycle = 10 + hobj.level_shift = .1 + hess = hobj.kernel() + self.assertAlmostEqual(lib.fp(hess), -0.7816352153153946, 6) + def test_finite_diff_x2c_rhf_hess(self): mf = scf.RHF(mol).x2c() mf.conv_tol = 1e-14 diff --git a/pyscf/hessian/test/test_uhf.py b/pyscf/hessian/test/test_uhf.py index eb2622d10b..06d32b38ad 100644 --- a/pyscf/hessian/test/test_uhf.py +++ b/pyscf/hessian/test/test_uhf.py @@ -36,6 +36,15 @@ def tearDownModule(): del mol class KnownValues(unittest.TestCase): + def test_uhf_hess(self): + mf = scf.UHF(mol) + mf.conv_tol = 1e-14 + e0 = mf.kernel() + hobj = mf.Hessian() + hobj.level_shift = .05 + hess = hobj.kernel() + self.assertAlmostEqual(lib.fp(hess), -0.20243405976628576, 5) + def test_finite_diff_rhf_hess(self): mf = scf.UHF(mol) mf.conv_tol = 1e-14 diff --git a/pyscf/hessian/uhf.py b/pyscf/hessian/uhf.py index 1be5ccc587..1b30e264ee 100644 --- a/pyscf/hessian/uhf.py +++ b/pyscf/hessian/uhf.py @@ -257,7 +257,8 @@ def make_h1(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None): return chkfile def solve_mo1(mf, mo_energy, mo_coeff, mo_occ, h1ao_or_chkfile, - fx=None, atmlst=None, max_memory=4000, verbose=None): + fx=None, atmlst=None, max_memory=4000, verbose=None, + max_cycle=50, level_shift=0): mol = mf.mol if atmlst is None: atmlst = range(mol.natm) @@ -306,7 +307,8 @@ def _ao2mo(mat, mo_coeff, mocc): h1vo = (numpy.vstack(h1voa), numpy.vstack(h1vob)) s1vo = (numpy.vstack(s1voa), numpy.vstack(s1vob)) - mo1, e1 = ucphf.solve(fx, mo_energy, mo_occ, h1vo, s1vo) + mo1, e1 = ucphf.solve(fx, mo_energy, mo_occ, h1vo, s1vo, + max_cycle=max_cycle, level_shift=level_shift) mo1a = numpy.einsum('pq,xqi->xpi', mo_coeff[0], mo1[0]).reshape(-1,3,nao,nocca) mo1b = numpy.einsum('pq,xqi->xpi', mo_coeff[1], mo1[1]).reshape(-1,3,nao,noccb) e1a = e1[0].reshape(-1,3,nocca,nocca) @@ -449,7 +451,8 @@ class Hessian(rhf_hess.HessianBase): def solve_mo1(self, mo_energy, mo_coeff, mo_occ, h1ao_or_chkfile, fx=None, atmlst=None, max_memory=4000, verbose=None): return solve_mo1(self.base, mo_energy, mo_coeff, mo_occ, h1ao_or_chkfile, - fx, atmlst, max_memory, verbose) + fx, atmlst, max_memory, verbose, + max_cycle=self.max_cycle, level_shift=self.level_shift) def to_gpu(self): raise NotImplementedError diff --git a/pyscf/lib/linalg_helper.py b/pyscf/lib/linalg_helper.py index fe2892364c..1fcc1a9265 100644 --- a/pyscf/lib/linalg_helper.py +++ b/pyscf/lib/linalg_helper.py @@ -1290,9 +1290,9 @@ def krylov(aop, b, x0=None, tol=1e-10, max_cycle=30, dot=numpy.dot, >>> from pyscf import lib >>> a = numpy.random.random((10,10)) * 1e-2 >>> b = numpy.random.random(10) - >>> aop = lambda x: numpy.dot(a,x) + >>> aop = lambda x: a.dot(x.T).T >>> x = lib.krylov(aop, b) - >>> numpy.allclose(numpy.dot(a,x)+x, b) + >>> numpy.allclose(aop(x)+x, b) True ''' if isinstance(aop, numpy.ndarray) and aop.ndim == 2: diff --git a/pyscf/lib/test/test_linalg_helper.py b/pyscf/lib/test/test_linalg_helper.py index 6cc7e463c4..6c8c57f56f 100644 --- a/pyscf/lib/test/test_linalg_helper.py +++ b/pyscf/lib/test/test_linalg_helper.py @@ -116,6 +116,24 @@ def precond(x, *args): x1 = linalg_helper.krylov(aop, b/a_diag, x1, max_cycle=30) self.assertAlmostEqual(abs(xref - x1).max(), 0, 6) + def test_krylov_with_level_shift(self): + numpy.random.seed(10) + n = 100 + a = numpy.random.rand(n,n) * .1 + a = a.dot(a.T) + a_diag = numpy.random.rand(n) + b = numpy.random.rand(n) + ref = numpy.linalg.solve(numpy.diag(a_diag) + a, b) + + #((diag+shift) + (a-shift)) x = b + shift = .1 + a_diag += shift + a -= numpy.eye(n)*shift + + aop = lambda x: (a.dot(x.T).T/a_diag) + c = linalg_helper.krylov(aop, b/a_diag, max_cycle=18) + self.assertAlmostEqual(abs(ref - c).max(), 0, 9) + def test_dgeev(self): numpy.random.seed(12) n = 100 diff --git a/pyscf/scf/cphf.py b/pyscf/scf/cphf.py index 19e2d2d10b..73cbdc2010 100644 --- a/pyscf/scf/cphf.py +++ b/pyscf/scf/cphf.py @@ -27,7 +27,8 @@ def solve(fvind, mo_energy, mo_occ, h1, s1=None, - max_cycle=20, tol=1e-9, hermi=False, verbose=logger.WARN): + max_cycle=50, tol=1e-9, hermi=False, verbose=logger.WARN, + level_shift=0): ''' Args: fvind : function @@ -36,29 +37,43 @@ def solve(fvind, mo_energy, mo_occ, h1, s1=None, Kwargs: hermi : boolean Whether the matrix defined by fvind is Hermitian or not. + level_shift : float + Add to diagonal terms to slightly improve the convergence speed of + Krylov solver ''' if s1 is None: return solve_nos1(fvind, mo_energy, mo_occ, h1, - max_cycle, tol, hermi, verbose) + max_cycle, tol, hermi, verbose, level_shift) else: return solve_withs1(fvind, mo_energy, mo_occ, h1, s1, - max_cycle, tol, hermi, verbose) + max_cycle, tol, hermi, verbose, level_shift) kernel = solve # h1 shape is (:,nvir,nocc) def solve_nos1(fvind, mo_energy, mo_occ, h1, - max_cycle=20, tol=1e-9, hermi=False, verbose=logger.WARN): - '''For field independent basis. First order overlap matrix is zero''' + max_cycle=50, tol=1e-9, hermi=False, verbose=logger.WARN, + level_shift=0): + '''For field independent basis. First order overlap matrix is zero + + Kwargs: + level_shift : float + Add to diagonal terms to slightly improve the convergence speed of + Krylov solver + ''' + assert not hermi log = logger.new_logger(verbose=verbose) t0 = (logger.process_clock(), logger.perf_counter()) e_a = mo_energy[mo_occ==0] e_i = mo_energy[mo_occ>0] - e_ai = 1 / lib.direct_sum('a-i->ai', e_a, e_i) + e_ai = 1 / (e_a[:,None] + level_shift - e_i) mo1base = h1 * -e_ai def vind_vo(mo1): - v = fvind(mo1.reshape(h1.shape)).reshape(h1.shape) + mo1 = mo1.reshape(h1.shape) + v = fvind(mo1).reshape(h1.shape) + if level_shift != 0: + v -= mo1 * level_shift v *= e_ai return v.ravel() mo1 = lib.krylov(vind_vo, mo1base.ravel(), @@ -68,20 +83,23 @@ def vind_vo(mo1): # h1 shape is (:,nocc+nvir,nocc) def solve_withs1(fvind, mo_energy, mo_occ, h1, s1, - max_cycle=20, tol=1e-9, hermi=False, verbose=logger.WARN): + max_cycle=50, tol=1e-9, hermi=False, verbose=logger.WARN, + level_shift=0): '''For field dependent basis. First order overlap matrix is non-zero. The first order orbitals are set to C^1_{ij} = -1/2 S1 e1 = h1 - s1*e0 + (e0_j-e0_i)*c1 + vhf[c1] Kwargs: - hermi : boolean - Whether the matrix defined by fvind is Hermitian or not. + level_shift : float + Add to diagonal terms to slightly improve the convergence speed of + Krylov solver Returns: First order orbital coefficients (in MO basis) and first order orbital energy matrix ''' + assert not hermi log = logger.new_logger(verbose=verbose) t0 = (logger.process_clock(), logger.perf_counter()) @@ -89,34 +107,38 @@ def solve_withs1(fvind, mo_energy, mo_occ, h1, s1, viridx = mo_occ == 0 e_a = mo_energy[viridx] e_i = mo_energy[occidx] - e_ai = 1 / lib.direct_sum('a-i->ai', e_a, e_i) + e_ai = 1 / (e_a[:,None] + level_shift - e_i) nvir, nocc = e_ai.shape nmo = nocc + nvir s1 = s1.reshape(-1,nmo,nocc) hs = mo1base = h1.reshape(-1,nmo,nocc) - s1*e_i - mo_e1 = hs[:,occidx,:].copy() + mo1base = hs.copy() mo1base[:,viridx] *= -e_ai mo1base[:,occidx] = -s1[:,occidx] * .5 def vind_vo(mo1): - v = fvind(mo1.reshape(h1.shape)).reshape(-1,nmo,nocc) + mo1 = mo1.reshape(mo1base.shape) + v = fvind(mo1).reshape(mo1base.shape) + if level_shift != 0: + v -= mo1 * level_shift v[:,viridx,:] *= e_ai v[:,occidx,:] = 0 return v.ravel() mo1 = lib.krylov(vind_vo, mo1base.ravel(), tol=tol, max_cycle=max_cycle, hermi=hermi, verbose=log) mo1 = mo1.reshape(mo1base.shape) + mo1[:,occidx] = mo1base[:,occidx] log.timer('krylov solver in CPHF', *t0) - v1mo = fvind(mo1.reshape(h1.shape)).reshape(-1,nmo,nocc) - mo1[:,viridx] = mo1base[:,viridx] - v1mo[:,viridx]*e_ai + hs += fvind(mo1).reshape(mo1base.shape) + mo1[:,viridx] = hs[:,viridx] / (e_i - e_a[:,None]) # mo_e1 has the same symmetry as the first order Fock matrix (hermitian or # anti-hermitian). mo_e1 = v1mo - s1*lib.direct_sum('i+j->ij',e_i,e_i) - mo_e1 += mo1[:,occidx] * lib.direct_sum('i-j->ij', e_i, e_i) - mo_e1 += v1mo[:,occidx,:] + mo_e1 = hs[:,occidx,:] + mo_e1 += mo1[:,occidx] * (e_i[:,None] - e_i) if h1.ndim == 3: return mo1, mo_e1 diff --git a/pyscf/scf/ucphf.py b/pyscf/scf/ucphf.py index cf4a7e641a..f23442452a 100644 --- a/pyscf/scf/ucphf.py +++ b/pyscf/scf/ucphf.py @@ -27,7 +27,8 @@ def solve(fvind, mo_energy, mo_occ, h1, s1=None, - max_cycle=20, tol=1e-9, hermi=False, verbose=logger.WARN): + max_cycle=50, tol=1e-9, hermi=False, verbose=logger.WARN, + level_shift=0): ''' Args: fvind : function @@ -35,16 +36,18 @@ def solve(fvind, mo_energy, mo_occ, h1, s1=None, ''' if s1 is None: return solve_nos1(fvind, mo_energy, mo_occ, h1, - max_cycle, tol, hermi, verbose) + max_cycle, tol, hermi, verbose, level_shift) else: return solve_withs1(fvind, mo_energy, mo_occ, h1, s1, - max_cycle, tol, hermi, verbose) + max_cycle, tol, hermi, verbose, level_shift) kernel = solve # h1 shape is (:,nvir,nocc) def solve_nos1(fvind, mo_energy, mo_occ, h1, - max_cycle=20, tol=1e-9, hermi=False, verbose=logger.WARN): + max_cycle=20, tol=1e-9, hermi=False, verbose=logger.WARN, + level_shift=0): '''For field independent basis. First order overlap matrix is zero''' + assert not hermi log = logger.new_logger(verbose=verbose) t0 = (logger.process_clock(), logger.perf_counter()) @@ -56,15 +59,20 @@ def solve_nos1(fvind, mo_energy, mo_occ, h1, noccb = numpy.count_nonzero(occidxb) nvira = mo_occ[0].size - nocca nvirb = mo_occ[1].size - noccb - e_ai = numpy.hstack(((mo_energy[0][viridxa,None]-mo_energy[0][occidxa]).ravel(), - (mo_energy[1][viridxb,None]-mo_energy[1][occidxb]).ravel())) + mo_ea, mo_eb = mo_energy + e_ai = numpy.hstack( + ((mo_ea[viridxa,None]+level_shift - mo_ea[occidxa]).ravel(), + (mo_eb[viridxb,None]+level_shift - mo_eb[occidxb]).ravel())) e_ai = 1 / e_ai mo1base = numpy.hstack((h1[0].reshape(-1,nvira*nocca), h1[1].reshape(-1,nvirb*noccb))) mo1base *= -e_ai def vind_vo(mo1): - v = fvind(mo1.reshape(mo1base.shape)).reshape(mo1base.shape) + mo1 = mo1.reshape(mo1base.shape) + v = fvind(mo1).reshape(mo1base.shape) + if level_shift != 0: + v -= mo1 * level_shift v *= e_ai return v.ravel() mo1 = lib.krylov(vind_vo, mo1base.ravel(), @@ -83,12 +91,14 @@ def vind_vo(mo1): # h1 shape is (:,nvir+nocc,nocc) def solve_withs1(fvind, mo_energy, mo_occ, h1, s1, - max_cycle=20, tol=1e-9, hermi=False, verbose=logger.WARN): + max_cycle=20, tol=1e-9, hermi=False, verbose=logger.WARN, + level_shift=0): '''For field dependent basis. First order overlap matrix is non-zero. The first order orbitals are set to C^1_{ij} = -1/2 S1 e1 = h1 - s1*e0 + (e0_j-e0_i)*c1 + vhf[c1] ''' + assert not hermi log = logger.new_logger(verbose=verbose) t0 = (logger.process_clock(), logger.perf_counter()) @@ -99,27 +109,31 @@ def solve_withs1(fvind, mo_energy, mo_occ, h1, s1, nocca = numpy.count_nonzero(occidxa) noccb = numpy.count_nonzero(occidxb) nmoa, nmob = mo_occ[0].size, mo_occ[1].size - eai_a = mo_energy[0][viridxa,None] - mo_energy[0][occidxa] - eai_b = mo_energy[1][viridxb,None] - mo_energy[1][occidxb] + ei_a = mo_energy[0][occidxa] + ei_b = mo_energy[1][occidxb] + ea_a = mo_energy[0][viridxa] + ea_b = mo_energy[1][viridxb] + eai_a = 1. / (ea_a[:,None] + level_shift - ei_a) + eai_b = 1. / (ea_b[:,None] + level_shift - ei_b) s1_a = s1[0].reshape(-1,nmoa,nocca) nset = s1_a.shape[0] s1_b = s1[1].reshape(nset,nmob,noccb) - hs_a = mo1base_a = h1[0].reshape(nset,nmoa,nocca) - s1_a * mo_energy[0][occidxa] - hs_b = mo1base_b = h1[1].reshape(nset,nmob,noccb) - s1_b * mo_energy[1][occidxb] - mo_e1_a = hs_a[:,occidxa].copy() - mo_e1_b = hs_b[:,occidxb].copy() + hs_a = h1[0].reshape(nset,nmoa,nocca) - s1_a * ei_a + hs_b = h1[1].reshape(nset,nmob,noccb) - s1_b * ei_b - mo1base_a[:,viridxa]/= -eai_a - mo1base_b[:,viridxb]/= -eai_b + mo1base_a = hs_a.copy() + mo1base_b = hs_b.copy() + mo1base_a[:,viridxa] *= -eai_a + mo1base_b[:,viridxb] *= -eai_b mo1base_a[:,occidxa] = -s1_a[:,occidxa] * .5 mo1base_b[:,occidxb] = -s1_b[:,occidxb] * .5 - - eai_a = 1. / eai_a - eai_b = 1. / eai_b mo1base = numpy.hstack((mo1base_a.reshape(nset,-1), mo1base_b.reshape(nset,-1))) def vind_vo(mo1): + mo1 = mo1.reshape(mo1base.shape) v = fvind(mo1).reshape(mo1base.shape) + if level_shift != 0: + v -= mo1 * level_shift v1a = v[:,:nmoa*nocca].reshape(nset,nmoa,nocca) v1b = v[:,nmoa*nocca:].reshape(nset,nmob,noccb) v1a[:,viridxa] *= eai_a @@ -129,21 +143,23 @@ def vind_vo(mo1): return v.ravel() mo1 = lib.krylov(vind_vo, mo1base.ravel(), tol=tol, max_cycle=max_cycle, hermi=hermi, verbose=log) - log.timer('krylov solver in CPHF', *t0) - - v1mo = fvind(mo1).reshape(mo1base.shape) - v1a = v1mo[:,:nmoa*nocca].reshape(nset,nmoa,nocca) - v1b = v1mo[:,nmoa*nocca:].reshape(nset,nmob,noccb) mo1 = mo1.reshape(mo1base.shape) mo1_a = mo1[:,:nmoa*nocca].reshape(nset,nmoa,nocca) mo1_b = mo1[:,nmoa*nocca:].reshape(nset,nmob,noccb) - mo1_a[:,viridxa] = mo1base_a[:,viridxa] - v1a[:,viridxa] * eai_a - mo1_b[:,viridxb] = mo1base_b[:,viridxb] - v1b[:,viridxb] * eai_b + mo1_a[:,occidxa] = mo1base_a[:,occidxa] + mo1_b[:,occidxb] = mo1base_b[:,occidxb] + log.timer('krylov solver in CPHF', *t0) - mo_e1_a += mo1_a[:,occidxa] * (mo_energy[0][occidxa,None] - mo_energy[0][occidxa]) - mo_e1_b += mo1_b[:,occidxb] * (mo_energy[1][occidxb,None] - mo_energy[1][occidxb]) - mo_e1_a += v1mo[:,:nmoa*nocca].reshape(nset,nmoa,nocca)[:,occidxa] - mo_e1_b += v1mo[:,nmoa*nocca:].reshape(nset,nmob,noccb)[:,occidxb] + v1mo = fvind(mo1).reshape(mo1base.shape) + hs_a += v1mo[:,:nmoa*nocca].reshape(nset,nmoa,nocca) + hs_b += v1mo[:,nmoa*nocca:].reshape(nset,nmob,noccb) + mo1_a[:,viridxa] = hs_a[:,viridxa] / (ei_a - ea_a[:,None]) + mo1_b[:,viridxb] = hs_b[:,viridxb] / (ei_b - ea_b[:,None]) + + mo_e1_a = hs_a[:,occidxa] + mo_e1_b = hs_b[:,occidxb] + mo_e1_a += mo1_a[:,occidxa] * (ei_a[:,None] - ei_a) + mo_e1_b += mo1_b[:,occidxb] * (ei_b[:,None] - ei_b) if isinstance(h1[0], numpy.ndarray) and h1[0].ndim == 2: mo1_a, mo1_b = mo1_a[0], mo1_b[0] From 63ce7fd786dc329df32896255cf415dbd7841723 Mon Sep 17 00:00:00 2001 From: Xing Zhang Date: Sun, 4 Feb 2024 19:31:46 -0800 Subject: [PATCH 08/44] fix cp2k basis parse (issue #2043) (#2045) * fix issue #2043 * add test for parse_cp2k * allow symbol choosing for GTH PP parser --- pyscf/gto/basis/parse_cp2k.py | 21 ++++++---- pyscf/gto/basis/parse_cp2k_pp.py | 23 +++++++--- pyscf/gto/test/test_basis_parser.py | 65 +++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 13 deletions(-) diff --git a/pyscf/gto/basis/parse_cp2k.py b/pyscf/gto/basis/parse_cp2k.py index bc2b534fe3..bf624140ca 100644 --- a/pyscf/gto/basis/parse_cp2k.py +++ b/pyscf/gto/basis/parse_cp2k.py @@ -23,13 +23,14 @@ import re from pyscf.lib.exceptions import BasisNotFoundError from pyscf.gto.basis import parse_nwchem +from pyscf.gto.basis.parse_nwchem import _search_basis_block from pyscf import __config__ DISABLE_EVAL = getattr(__config__, 'DISABLE_EVAL', False) MAXL = 8 -def parse(string, optimize=False): +def parse(string, symb=None, optimize=False): '''Parse the basis text which is in CP2K format, return an internal basis format which can be assigned to :attr:`Mole.basis` Lines started with # are ignored. @@ -50,6 +51,12 @@ def parse(string, optimize=False): ... # ... """)} ''' + if symb is not None: + raw_data = list(filter(None, re.split(BASIS_SET_DELIMITER, string))) + string = _search_basis_block(raw_data, symb) + if not string: + raise BasisNotFoundError(f'Basis not found for {symb}') + bastxt = [] for dat in string.splitlines(): x = dat.split('#')[0].strip() @@ -115,10 +122,8 @@ def _parse(blines, optimize=False): def search_seg(basisfile, symb): with open(basisfile, 'r') as fin: fdata = re.split(BASIS_SET_DELIMITER, fin.read()) - for dat in fdata[1:]: - dat0 = dat.split(None, 1) - if dat0 and dat0[0] == symb: - # remove blank lines - return [x.strip() for x in dat.splitlines() - if x.strip() and 'END' not in x] - raise BasisNotFoundError(f'Basis for {symb} not found in {basisfile}') + raw_basis = _search_basis_block(fdata[1:], symb) + if not raw_basis: + raise BasisNotFoundError(f'Basis for {symb} not found in {basisfile}') + return [x.strip() for x in raw_basis.splitlines() + if x.strip() and 'END' not in x] diff --git a/pyscf/gto/basis/parse_cp2k_pp.py b/pyscf/gto/basis/parse_cp2k_pp.py index c385ca3b9c..ff64c1a0bc 100644 --- a/pyscf/gto/basis/parse_cp2k_pp.py +++ b/pyscf/gto/basis/parse_cp2k_pp.py @@ -21,10 +21,11 @@ ''' import sys +import re from pyscf.lib.exceptions import BasisNotFoundError import numpy as np -def parse(string): +def parse(string, symb=None): '''Parse the pseudo text *string* which is in CP2K format, return an internal basis format which can be assigned to :attr:`Cell.pseudo` Lines started with # are ignored. @@ -45,8 +46,14 @@ def parse(string): ... 0.28637912 0 ... """)} ''' - pseudotxt = [x.strip() for x in string.splitlines() - if x.strip() and 'END' not in x and '#PSEUDOPOTENTIAL' not in x] + if symb is not None: + raw_data = list(filter(None, re.split('#PSEUDOPOTENTIAL', string))) + pseudotxt = _search_gthpp_block(raw_data, symb) + if not pseudotxt: + raise BasisNotFoundError(f'Pseudopotential not found for {symb}.') + else: + pseudotxt = [x.strip() for x in string.splitlines() + if x.strip() and 'END' not in x and '#PSEUDOPOTENTIAL' not in x] return _parse(pseudotxt) def load(pseudofile, symb, suffix=None): @@ -95,7 +102,13 @@ def search_seg(pseudofile, symb, suffix=None): fin = open(pseudofile, 'r') fdata = fin.read().split('#PSEUDOPOTENTIAL') fin.close() - for dat in fdata[1:]: + dat = _search_gthpp_block(fdata[1:], symb, suffix) + if not dat: + raise BasisNotFoundError(f'Pseudopotential for {symb} in {pseudofile}') + return dat + +def _search_gthpp_block(raw_data, symb, suffix=None): + for dat in raw_data: dat0 = dat.split(None, 1) if dat0 and dat0[0] == symb: dat = [x.strip() for x in dat.splitlines() @@ -107,7 +120,7 @@ def search_seg(pseudofile, symb, suffix=None): else: if any(suffix == x.split('-')[-1] for x in dat[0].split()): return dat - raise BasisNotFoundError(f'Pseudopotential for {symb} in {pseudofile}') + return None if __name__ == '__main__': args = sys.argv[1:] diff --git a/pyscf/gto/test/test_basis_parser.py b/pyscf/gto/test/test_basis_parser.py index d886e252e5..db8d7873a4 100644 --- a/pyscf/gto/test/test_basis_parser.py +++ b/pyscf/gto/test/test_basis_parser.py @@ -21,6 +21,7 @@ from pyscf import lib from pyscf.gto.basis import parse_molpro from pyscf.gto.basis import parse_gaussian +from pyscf.gto.basis import parse_cp2k, parse_cp2k_pp from pyscf.lib.exceptions import BasisNotFoundError class KnownValues(unittest.TestCase): @@ -462,6 +463,70 @@ def test_parse_molpro_ecp_soc(self): [3, [[], [], [[2.928812, -11.777154, 7.851436], [2.904069, -15.525522, -7.762761], [0.287352, -0.14855, 0.099033], [0.48938, -0.273682, -0.136841]], [], [], [], []]]]] self.assertEqual(ecp_data, ref) + def test_parse_gth_basis(self): + basis_str = ''' + #BASIS SET + C DZV-GTH + 1 + 2 0 1 4 2 2 + 4.3362376436 0.1490797872 0.0000000000 -0.0878123619 0.0000000000 + 1.2881838513 -0.0292640031 0.0000000000 -0.2775560300 0.0000000000 + 0.4037767149 -0.6882040510 0.0000000000 -0.4712295093 0.0000000000 + 0.1187877657 -0.3964426906 1.0000000000 -0.4058039291 1.0000000000 + # + #BASIS SET + N DZV-GTH + 1 + 2 0 1 4 2 2 + 6.1526903413 0.1506300537 0.0000000000 -0.0950603476 0.0000000000 + 1.8236332280 -0.0360100734 0.0000000000 -0.2918864295 0.0000000000 + 0.5676628870 -0.6942023212 0.0000000000 -0.4739050050 0.0000000000 + 0.1628222852 -0.3878929987 1.0000000000 -0.3893418670 1.0000000000 + # + ''' + basis1 = parse_cp2k.parse(basis_str, 'C') + ref = gto.basis.load('gth-dzv', 'C') + self.assertEqual(ref, basis1) + basis1 = parse_cp2k.parse(basis_str, 'N') + ref = gto.basis.load('gth-dzv', 'N') + self.assertEqual(ref, basis1) + + basis_str = ''' + C DZV-GTH + 1 + 2 0 1 4 2 2 + 4.3362376436 0.1490797872 0.0000000000 -0.0878123619 0.0000000000 + 1.2881838513 -0.0292640031 0.0000000000 -0.2775560300 0.0000000000 + 0.4037767149 -0.6882040510 0.0000000000 -0.4712295093 0.0000000000 + 0.1187877657 -0.3964426906 1.0000000000 -0.4058039291 1.0000000000 + # + ''' + basis1 = parse_cp2k.parse(basis_str) + ref = gto.basis.load('gth-dzv', 'C') + self.assertEqual(ref, basis1) + + def test_parse_gth_pp(self): + pp_str = ''' + #PSEUDOPOTENTIAL + B GTH-PADE-q3 GTH-LDA-q3 GTH-PADE GTH-LDA + 2 1 + 0.43392956 2 -5.57864173 0.80425145 + 2 + 0.37384326 1 6.23392822 + 0.36039317 0 + #PSEUDOPOTENTIAL + C GTH-PADE-q4 GTH-LDA-q4 GTH-PADE GTH-LDA + 2 2 + 0.34883045 2 -8.51377110 1.22843203 + 2 + 0.30455321 1 9.52284179 + 0.23267730 0''' + pp1 = parse_cp2k_pp.parse(pp_str, 'B') + ref = gto.basis.load_pseudo('gth-pade', 'B') + self.assertEqual(ref, pp1) + pp1 = parse_cp2k_pp.parse(pp_str, 'C') + ref = gto.basis.load_pseudo('gth-pade', 'C') + self.assertEqual(ref, pp1) if __name__ == "__main__": print("test basis module") From 56adbedfb98b923dbecfad7be0c62a43d01f90ae Mon Sep 17 00:00:00 2001 From: matthew-hennefarth Date: Mon, 5 Feb 2024 13:00:16 -0600 Subject: [PATCH 09/44] SA-CASSCF Nonadiabatic Coupling Vectors (#2046) * add nacs stuff but need to rebuild * fix io warning test * add sacasscf nac files from mrh * update and add nac_method * fix for flake8 * add example file * update to assert len and uniqueness of states * udpate * udpate * udpate * fix assert statements and handling of state[0]==state[1] --- examples/nac/01-sacasscf_nac.py | 90 ++++++++++ pyscf/mcscf/addons.py | 8 + pyscf/mcscf/mc1step.py | 4 + pyscf/nac/__init__.py | 32 ++++ pyscf/nac/sacasscf.py | 293 ++++++++++++++++++++++++++++++++ pyscf/nac/test/test_sacasscf.py | 175 +++++++++++++++++++ 6 files changed, 602 insertions(+) create mode 100644 examples/nac/01-sacasscf_nac.py create mode 100644 pyscf/nac/__init__.py create mode 100644 pyscf/nac/sacasscf.py create mode 100644 pyscf/nac/test/test_sacasscf.py diff --git a/examples/nac/01-sacasscf_nac.py b/examples/nac/01-sacasscf_nac.py new file mode 100644 index 0000000000..5384498ad1 --- /dev/null +++ b/examples/nac/01-sacasscf_nac.py @@ -0,0 +1,90 @@ +from pyscf import gto, scf, mcscf, lib + +# NAC signs are really, really hard to nail down. +# There are arbitrary signs associated with +# 1. The MO coefficients +# 2. The CI vectors +# 3. Almost any kind of post-processing (natural-orbital analysis, etc.) +# 4. Developer convention on whether the bra index or ket index is 1st +# It MIGHT help comparison to OpenMolcas if you load a rasscf.h5 file +# I TRIED to choose the same convention for #4 as OpenMolcas. +mol = gto.M (atom='Li 0 0 0;H 1.5 0 0', basis='sto-3g', + output='LiH_sa2casscf22_sto3g.log', verbose=lib.logger.INFO) + +mf = scf.RHF (mol).run () +mc = mcscf.CASSCF (mf, 2, 2) +mc.fix_spin_(ss=0, shift=1) +mc = mc.state_average ([0.5,0.5]).run (conv_tol=1e-10) + +mc_nacs = mc.nac_method() + +# 1. <1|d0/dR> +# Equivalent OpenMolcas input: +# ``` +# &ALASKA +# NAC=1 2 +# ``` +nac = mc_nacs.kernel (state=(0,1)) +print ("\nNAC <1|d0/dR>:\n", nac) +print ("Notice that according to the NACs printed above, rigidly moving the") +print ("molecule along the bond axis changes the electronic wave function, which") +print ("is obviously unphysical. This broken translational symmetry is due to the") +print ("'CSF contribution'. Omitting the CSF contribution corresponds to using the") +print ("'electron-translation factors' of Fatehi and Subotnik and is requested by") +print ("passing 'use_etfs=True'.") + +# 2. <1|d0/dR> w/ ETFs (i.e., w/out CSF contribution) +# Equivalent OpenMolcas input: +# ``` +# &ALASKA +# NAC=1 2 +# NOCSF +# ``` +nac = mc_nacs.kernel (state=(0,1), use_etfs=True) +print ("\nNAC <1|d0/dR> w/ ETFs:\n", nac) +print ("These NACs are much more well-behaved: moving the molecule rigidly around") +print ("in space doesn't induce any change to the electronic wave function.") + +# 3. <0|d1/dR> +# Equivalent OpenMolcas input: +# ``` +# &ALASKA +# NAC=2 1 +# ``` +nac = mc_nacs.kernel (state=(1,0)) +print ("\nThe NACs are antisymmetric with respect to state transposition.") +print ("NAC <0|d1/dR>:\n", nac) + +# 4. <0|d1/dR> w/ ETFs +# Equivalent OpenMolcas input: +# ``` +# &ALASKA +# NAC=2 1 +# NOCSF +# ``` +nac = mc_nacs.kernel (state=(1,0), use_etfs=True) +print ("NAC <0|d1/dR> w/ ETFs:\n", nac) + +# 5. <1|d0/dR>*(E1-E0) = <0|d1/dR>*(E0-E1) +# I'm not aware of any OpenMolcas equivalent for this, but all the information +# should obviously be in the output file, as long as you aren't right at a CI. +nac_01 = mc_nacs.kernel (state=(0,1), mult_ediff=True) +nac_10 = mc_nacs.kernel (state=(1,0), mult_ediff=True) +print ("\nNACs diverge at conical intersections (CI). The important question") +print ("is how quickly it diverges. You can get at this by calculating NACs") +print ("multiplied by the energy difference using the keyword 'mult_ediff=True'.") +print ("This yields a quantity which is symmetric wrt state interchange and is") +print ("finite at a CI.") +print ("NAC <1|d0/dR>*(E1-E0):\n", nac_01) +print ("NAC <0|d1/dR>*(E0-E1):\n", nac_10) + +# 6. <1|d0/dR>*(E1-E0) w/ETFs = <0|d1/dR>*(E0-E1) w/ETFs = <0|dH/dR|1> +# This is the quantity one uses to optimize MECIs +v01 = mc_nacs.kernel (state=(0,1), use_etfs=True, mult_ediff=True) +v10 = mc_nacs.kernel (state=(1,0), use_etfs=True, mult_ediff=True) +print ("\nUsing both 'use_etfs=True' and 'mult_ediff=True' corresponds to the") +print ("derivative of the off-diagonal element of the potential matrix. This") +print ("tells you one of the two components of the branching plane at the CI.") +print ("<1|d0/dR>*(E1-E0) w/ ETFs = <1|dH/dR|0>:\n", v01) +print ("<0|d1/dR>*(E0-E1) w/ ETFs = <0|dH/dR|1>:\n", v10) + diff --git a/pyscf/mcscf/addons.py b/pyscf/mcscf/addons.py index 0e7cca66fe..57b1db9e5f 100644 --- a/pyscf/mcscf/addons.py +++ b/pyscf/mcscf/addons.py @@ -1104,6 +1104,14 @@ def nuc_grad_method (self, state=None): Gradients = nuc_grad_method + def nac_method(self): + if callable(getattr(self, '_state_average_nac_method', None)): + return self._state_average_nac_method() + else: + raise NotImplementedError("NAC method") + + NACs = nac_method + def state_average_(casscf, weights=(0.5,0.5), wfnsym=None): ''' Inplace version of state_average ''' sacasscf = state_average (casscf, weights, wfnsym) diff --git a/pyscf/mcscf/mc1step.py b/pyscf/mcscf/mc1step.py index 35249ce9a5..020d6e6d88 100644 --- a/pyscf/mcscf/mc1step.py +++ b/pyscf/mcscf/mc1step.py @@ -1275,6 +1275,10 @@ def _state_average_nuc_grad_method (self, state=None): from pyscf.grad import sacasscf as sacasscf_grad return sacasscf_grad.Gradients (self, state=state) + def _state_average_nac_method(self): + from pyscf.nac import sacasscf as sacasscf_nac + return sacasscf_nac.NonAdiabaticCouplings(self) + def newton(self): from pyscf.mcscf import newton_casscf from pyscf.mcscf.addons import StateAverageMCSCFSolver diff --git a/pyscf/nac/__init__.py b/pyscf/nac/__init__.py new file mode 100644 index 0000000000..897245c989 --- /dev/null +++ b/pyscf/nac/__init__.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +Analytical Nonadiabatic Coupling Vectors +============================ + +Simple usage:: + + >>> from pyscf import gto, scf, mcscf, nac + >>> mol = gto.M(atom='N 0 0 0; N 0 0 1', basis='ccpvdz') + >>> mf = scf.RHF(mol).run() + >>> mc = mcscf.CASSCF(mf, 2, 2).state_average([0.5, 0.5]).run() + >>> mc_nac = nac.sacasscf.NonAdiabaticCouplings(mc) + >>> mc_nac = mc.nac_method() # Also valid + >>> mc_nac.kernel(state=(0,1), use_etfs=False) +""" + +from . import sacasscf diff --git a/pyscf/nac/sacasscf.py b/pyscf/nac/sacasscf.py new file mode 100644 index 0000000000..fe84e034ff --- /dev/null +++ b/pyscf/nac/sacasscf.py @@ -0,0 +1,293 @@ +import numpy as np +from pyscf import lib +from pyscf.lib import logger +from pyscf.fci import direct_spin1 +from pyscf.mcscf import newton_casscf +from pyscf.grad import casscf as casscf_grad +from pyscf.grad import sacasscf as sacasscf_grad +from functools import reduce + +# The extension from gradients -> NACs has three basic steps: +# 0. ("state" index integer -> tuple) +# 1. fcisolver.make_rdm12 -> fcisolver.trans_rdm12 +# 2. remove core-orbital and nuclear contributions to everything +# 3. option to include the "csf contribution" +# Additional good ideas: +# a. Option to multiply NACs by the energy difference to control +# singularities + +def _unpack_state(state): + assert len(state) == 2, "derivative couplings are defined between 2 states" + return state[0], state[1] + + +def grad_elec_core(mc_grad, mo_coeff=None, atmlst=None, eris=None, mf_grad=None): + """Compute the core-electron part of the CASSCF (Hellmann-Feynman) + gradient using a modified RHF grad_elec call.""" + mc = mc_grad.base + if mo_coeff is None: mo_coeff = mc.mo_coeff + if eris is None: eris = mc.ao2mo (mo_coeff) + if mf_grad is None: mf_grad = mc._scf.nuc_grad_method () + ncore = mc.ncore + moH = mo_coeff.conj ().T + f0 = (moH @ mc.get_hcore () @ mo_coeff) + eris.vhf_c + mo_energy = f0.diagonal ().copy () + mo_occ = np.zeros_like (mo_energy) + mo_occ[:ncore] = 2.0 + f0 *= mo_occ[None,:] + dme0 = lambda * args: mo_coeff @ ((f0+f0.T)*.5) @ moH + with lib.temporary_env (mf_grad, make_rdm1e=dme0, verbose=0): + with lib.temporary_env (mf_grad.base, mo_coeff=mo_coeff, mo_occ=mo_occ): + # Second level there should become unnecessary in future, if anyone + # ever gets around to cleaning up pyscf.df.grad.rhf & pyscf.grad.rhf + de = mf_grad.grad_elec (mo_coeff=mo_coeff, mo_energy=mo_energy, + mo_occ=mo_occ, atmlst=atmlst) + return de + +def grad_elec_active (mc_grad, mo_coeff=None, ci=None, atmlst=None, + eris=None, mf_grad=None, verbose=None): + '''Compute the active-electron part of the CASSCF (Hellmann-Feynman) + gradient by subtracting the core-electron part.''' + t0 = (logger.process_clock (), logger.perf_counter ()) + mc = mc_grad.base + log = logger.new_logger (mc_grad, verbose) + if mf_grad is None: mf_grad=mc._scf.nuc_grad_method () + de = mc_grad.grad_elec (mo_coeff=mo_coeff, ci=ci, atmlst=atmlst, + verbose=0) + de -= grad_elec_core (mc_grad, mo_coeff=mo_coeff, atmlst=atmlst, + eris=eris, mf_grad=mf_grad) + log.debug ('CASSCF active-orbital gradient:\n{}'.format (de)) + log.timer ('CASSCF active-orbital gradient', *t0) + return de + +def gen_g_hop_active (mc, mo, ci0, eris, verbose=None): + '''Compute the active-electron part of the orbital rotation gradient + by patching out the appropriate block of eris.vhf_c''' + moH = mo.conj ().T + ncore = mc.ncore + vnocore = eris.vhf_c.copy () + vnocore[:,:ncore] = -moH @ mc.get_hcore () @ mo[:,:ncore] + with lib.temporary_env (eris, vhf_c=vnocore): + return newton_casscf.gen_g_hop (mc, mo, ci0, eris, verbose=verbose) + +def _nac_csf (mol, mf_grad, tm1, atmlst): + if atmlst is None: atmlst = list (range (mol.natm)) + aoslices = mol.aoslice_by_atom () + s1 = mf_grad.get_ovlp (mol) + # if libcint documentation is to be trusted, mf_grad.get_ovlp + # corresponds to differentiating on the SECOND index: + nac = np.zeros ((len(atmlst), 3)) + for k, ia in enumerate (atmlst): + shl0, shl1, p0, p1 = aoslices[ia] + nac[k] += 0.5*np.einsum ('xij,ij->x', s1[:,p0:p1], tm1[p0:p1]) + return nac + +def nac_csf (mc_grad, mo_coeff=None, ci=None, state=None, mf_grad=None, + atmlst=None): + '''Compute the "CSF contribution" to the SA-CASSCF NAC''' + mc = mc_grad.base + if mo_coeff is None: mo_coeff = mc.mo_coeff + if ci is None: ci = mc.ci + if state is None: state = mc_grad.state + if mf_grad is None: mf_grad = mc._scf.nuc_grad_method () + if atmlst is None: atmlst = mc_grad.atmlst + mol = mc.mol + ket, bra = _unpack_state (state) + ncore, ncas, nelecas = mc.ncore, mc.ncas, mc.nelecas + castm1 = direct_spin1.trans_rdm1 (ci[bra], ci[ket], ncas, nelecas) + # if PySCF commentary is to be trusted, trans_rdm1[p,q] is + # . I want . + castm1 = castm1.conj ().T - castm1 + mo_cas = mo_coeff[:,ncore:][:,:ncas] + tm1 = reduce (np.dot, (mo_cas, castm1, mo_cas.conj ().T)) + return _nac_csf (mol, mf_grad, tm1, atmlst) + +class NonAdiabaticCouplings (sacasscf_grad.Gradients): + '''SA-CASSCF non-adiabatic couplings (NACs) between states + + kwargs/attributes: + + state : tuple of length 2 + The NACs returned are . + In other words, state = (ket, bra). + mult_ediff : logical + If True, returns NACs multiplied by the energy difference. + Useful near conical intersections to avoid numerical problems. + use_etfs : logical + If True, use the ``electron translation factors'' of Fatehi and + Subotnik [JPCL 3, 2039 (2012)], which guarantee conservation of + total electron + nuclear momentum when the nuclei are moving + (i.e., in non-adiabatic molecular dynamics). This corresponds + to omitting the so-called ``CSF contribution'' [cf. JCTC 12, + 3636 (2016)]. + ''' + + def __init__(self, mc, state=None, mult_ediff=False, use_etfs=False): + self.mult_ediff = mult_ediff + self.use_etfs = use_etfs + if state is not None: + assert len(state) == 2, "derivative couplings are defined between 2 states" + sacasscf_grad.Gradients.__init__(self, mc, state=state) + + def make_fcasscf_nacs (self, state=None, casscf_attr=None, + fcisolver_attr=None): + if state is None: state = self.state + if casscf_attr is None: casscf_attr = {} + if fcisolver_attr is None: fcisolver_attr = {} + ket, bra = _unpack_state (state) + ci, ncas, nelecas = self.base.ci, self.base.ncas, self.base.nelecas + # TODO: use fcisolver.fcisolvers in state-average mix case for this + castm1, castm2 = direct_spin1.trans_rdm12 (ci[bra], ci[ket], ncas, + nelecas) + castm1 = 0.5 * (castm1 + castm1.T) + castm2 = 0.5 * (castm2 + castm2.transpose (1,0,3,2)) + fcisolver_attr['make_rdm12'] = lambda *args, **kwargs : (castm1, castm2) + fcisolver_attr['make_rdm1'] = lambda *args, **kwargs : castm1 + fcisolver_attr['make_rdm2'] = lambda *args, **kwargs : castm2 + return sacasscf_grad.Gradients.make_fcasscf (self, + state=ket, casscf_attr=casscf_attr, fcisolver_attr=fcisolver_attr) + + + def get_wfn_response (self, atmlst=None, state=None, verbose=None, mo=None, ci=None, **kwargs): + if state is None: state = self.state + if atmlst is None: atmlst = self.atmlst + if verbose is None: verbose = self.verbose + if mo is None: mo = self.base.mo_coeff + if ci is None: ci = self.base.ci + log = logger.new_logger (self, verbose) + ket, bra = _unpack_state (state) + fcasscf = self.make_fcasscf_nacs (state) + fcasscf.mo_coeff = mo + fcasscf.ci = ci[ket] + eris = fcasscf.ao2mo (mo) + g_all_ket = gen_g_hop_active (fcasscf, mo, ci[ket], eris, verbose)[0] + g_all = np.zeros (self.nlag) + g_all[:self.ngorb] = g_all_ket[:self.ngorb] + # The fun thing about the ci sector is that you swap them (&/2): + # = = /2 + # (It should be zero for converged SA-CASSCF anyway, though) + g_ci_bra = 0.5 * g_all_ket[self.ngorb:] + g_all_bra = gen_g_hop_active (fcasscf, mo, ci[bra], eris, verbose)[0] + g_ci_ket = 0.5 * g_all_bra[self.ngorb:] + # I have to make sure they don't talk to each other because the + # preconditioner doesn't explore that space at all. Should I + # instead solve at the init_guess step, like in MC-PDFT? + # In practice it should all be zeros but how tightly does + # everything have to be converged? + ndet_ket = (self.na_states[ket], self.nb_states[ket]) + ndet_bra = (self.na_states[bra], self.nb_states[bra]) + if ndet_ket==ndet_bra: + ket2bra = np.dot (ci[bra].conj ().ravel (), g_ci_ket) + bra2ket = np.dot (ci[ket].conj ().ravel (), g_ci_bra) + log.debug ('SA-CASSCF , check: %5.3g , %5.3g', + ket2bra, bra2ket) + g_ci_ket -= ket2bra * ci[bra].ravel () + g_ci_bra -= bra2ket * ci[ket].ravel () + ndet_ket = ndet_ket[0]*ndet_ket[1] + ndet_bra = ndet_bra[0]*ndet_bra[1] + # No need to reshape or anything, just use the magic of repeated slicing + offs_ket = (sum ([na * nb for na, nb in zip( + self.na_states[:ket], self.nb_states[:ket])]) + if ket > 0 else 0) + offs_bra = (sum ([na * nb for na, nb in zip( + self.na_states[:bra], self.nb_states[:bra])]) + if ket > 0 else 0) + g_all[self.ngorb:][offs_ket:][:ndet_ket] = g_ci_ket + g_all[self.ngorb:][offs_bra:][:ndet_bra] = g_ci_bra + return g_all + + + def get_ham_response (self, state=None, atmlst=None, verbose=None, mo=None, + ci=None, eris=None, mf_grad=None, **kwargs): + if state is None: state = self.state + if atmlst is None: atmlst = self.atmlst + if verbose is None: verbose = self.verbose + if mo is None: mo = self.base.mo_coeff + if ci is None: ci = self.base.ci + if mf_grad is None: mf_grad = self.base._scf.nuc_grad_method () + if eris is None and self.eris is None: + eris = self.eris = self.base.ao2mo (mo) + elif eris is None: + eris = self.eris + use_etfs = kwargs.get ('use_etfs', self.use_etfs) + ket, bra = _unpack_state (state) + fcasscf_grad = casscf_grad.Gradients (self.make_fcasscf_nacs (state)) + nac = grad_elec_active (fcasscf_grad, mo_coeff=mo, ci=ci[ket], + eris=eris, atmlst=atmlst, verbose=verbose) + if not use_etfs: nac += self.nac_csf ( + mo_coeff=mo, ci=ci, state=state, mf_grad=mf_grad, atmlst=atmlst) + return nac + + def nac_csf (self, mo_coeff=None, ci=None, state=None, mf_grad=None, atmlst=None): + if state is None: state = self.state + if atmlst is None: atmlst = self.atmlst + if mo_coeff is None: mo_coeff = self.base.mo_coeff + if ci is None: ci = self.base.ci + if mf_grad is None: mf_grad = self.base._scf.nuc_grad_method () + nac = nac_csf (self, mo_coeff=mo_coeff, ci=ci, state=state, + mf_grad=mf_grad, atmlst=atmlst) + ket, bra = _unpack_state (state) + e_bra = self.base.e_states[bra] + e_ket = self.base.e_states[ket] + nac *= e_bra - e_ket + return nac + + def kernel (self, *args, **kwargs): + mult_ediff = kwargs.get ('mult_ediff', self.mult_ediff) + state = kwargs.get ('state', self.state) + assert len(state) == 2, "derivative couplings are defined between 2 states" + if state[0] == state[1]: + mol = kwargs.get('mol', self.mol) + atmlst = kwargs.get('atmlst', range(mol.natm)) + return np.zeros((len(atmlst), 3)) + + nac = sacasscf_grad.Gradients.kernel (self, *args, **kwargs) + if not mult_ediff: + ket, bra = _unpack_state (state) + e_bra = self.base.e_states[bra] + e_ket = self.base.e_states[ket] + nac /= e_bra - e_ket + return nac + +if __name__=='__main__': + from pyscf import gto, scf, mcscf + from scipy import linalg + mol = gto.M (atom = 'Li 0 0 0; H 0 0 1.5', basis='sto-3g', + output='sacasscf_nacs.log', verbose=lib.logger.INFO) + mf = scf.RHF (mol).run () + mc = mcscf.CASSCF (mf, 2, 2).fix_spin_(ss=0).state_average ([0.5,0.5]).run (conv_tol=1e-10) + openmolcas_energies = np.array ([-7.85629118, -7.72175252]) + print ("energies:",mc.e_states) + print ("disagreement w openmolcas:", np.around (mc.e_states-openmolcas_energies, 8)) + mc_nacs = NonAdiabaticCouplings (mc) + print ("no csf contr") + nac_01 = mc_nacs.kernel (state=(0,1), use_etfs=True) + nac_10 = mc_nacs.kernel (state=(1,0), use_etfs=True) + nac_01_mult = mc_nacs.kernel (state=(0,1), use_etfs=True, mult_ediff=True) + nac_10_mult = mc_nacs.kernel (state=(1,0), use_etfs=True, mult_ediff=True) + print ("antisym") + print (nac_01) + print ("checking antisym:",linalg.norm(nac_01+nac_10)) + print ("sym") + print (nac_01_mult) + print ("checking sym:",linalg.norm(nac_01_mult-nac_10_mult)) + + + print ("incl csf contr") + nac_01 = mc_nacs.kernel (state=(0,1), use_etfs=False) + nac_10 = mc_nacs.kernel (state=(1,0), use_etfs=False) + nac_01_mult = mc_nacs.kernel (state=(0,1), use_etfs=False, mult_ediff=True) + nac_10_mult = mc_nacs.kernel (state=(1,0), use_etfs=False, mult_ediff=True) + print ("antisym") + print (nac_01) + print ("checking antisym:",linalg.norm(nac_01+nac_10)) + print ("sym") + print (nac_01_mult) + print ("checking sym:",linalg.norm(nac_01_mult-nac_10_mult)) + + print ("Check gradients") + mc_grad = mc.nuc_grad_method () + de_0 = mc_grad.kernel (state=0) + print (de_0) + de_1 = mc_grad.kernel (state=1) + print (de_1) diff --git a/pyscf/nac/test/test_sacasscf.py b/pyscf/nac/test/test_sacasscf.py new file mode 100644 index 0000000000..5a33c966d0 --- /dev/null +++ b/pyscf/nac/test/test_sacasscf.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python +# Copyright 2014-2022 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import numpy as np +from pyscf import gto, scf, mcscf +from pyscf.nac.sacasscf import NonAdiabaticCouplings +import unittest + + +def diatomic(atom1, atom2, r, basis, ncas, nelecas, nstates, + charge=None, spin=None, symmetry=False, cas_irrep=None): + global mols + xyz = '{:s} 0.0 0.0 0.0; {:s} {:.3f} 0.0 0.0'.format(atom1, atom2, r) + mol = gto.M(atom=xyz, basis=basis, charge=charge, spin=spin, + symmetry=symmetry, verbose=0, output='/dev/null') + + mols.append(mol) + + mf = scf.RHF(mol) + + mc = mcscf.CASSCF(mf.run(), ncas, nelecas).set(natorb=True) + + if spin is not None: + s = spin*0.5 + + else: + s = (mol.nelectron % 2)*0.5 + + mc.fix_spin_(ss=s*(s+1), shift=1) + mc = mc.state_average([1.0/float(nstates), ]*nstates) + mc.conv_tol = mc.conv_tol_diabatize = 1e-12 + mo = None + + if symmetry and (cas_irrep is not None): + mo = mc.sort_mo_by_irrep(cas_irrep) + + mc.kernel(mo) + + return mc.nac_method() + +def setUpModule(): + global mols + mols = [] + +def tearDownModule(): + global mols, diatomic + [m.stdout.close() for m in mols] + del mols, diatomic + + +class KnownValues(unittest.TestCase): + + def test_nac_h2_sa2casscf22_sto3g(self): + # z_orb: no + # z_ci: yes + # z_is: no + mc_grad = diatomic('H', 'H', 1.3, 'STO-3G', 2, 2, 2) + + # OpenMolcas v23.02 - PC + de_ref = np.array([[2.24611972496341E-01, 2.24611972496341E-01], + [3.91518173397213E-18, -3.91518173397213E-18]]) + for i in range(2): + with self.subTest(use_etfs=bool(i)): + de = mc_grad.kernel(state=(0, 1), use_etfs=bool(i))[:, 0] + de *= np.sign(de[0]) * np.sign(de_ref[i, 0]) + # TODO: somehow confirm sign convention + self.assertAlmostEqual(de[0], de_ref[i, 0], 5) + self.assertAlmostEqual(de[1], de_ref[i, 1], 5) + + + def test_nac_h2_sa3casscf22_sto3g(self): + # z_orb: no + # z_ci: no + # z_is: no + mc_grad = diatomic('H', 'H', 1.3, 'STO-3G', 2, 2, 3) + + # OpenMolcas v23.02 - PC + de_ref = np.array([[2.24611972496341E-01,2.24611972496341E-01 ], + [3.91518173397213E-18, -3.91518173397213E-18]]) + for i in range(2): + with self.subTest(use_etfs=bool(i)): + de = mc_grad.kernel(state=(0, 1), use_etfs=bool(i))[:, 0] + de *= np.sign(de[0]) * np.sign(de_ref[i, 0]) + # TODO: somehow confirm sign convention + self.assertAlmostEqual(de[0], de_ref[i, 0], 5) + self.assertAlmostEqual(de[1], de_ref[i, 1], 5) + + def test_nac_h2_sa2caasf22_631g(self): + # z_orb: yes + # z_ci: yes + # z_is: no + mc_grad = diatomic('H', 'H', 1.3, '6-31G', 2, 2, 2) + + # OpenMolcas v23.02 - PC + de_ref = np.array([[2.63335709207419E-01,2.63335709207420E-01], + [-4.13635186565710E-16,4.47060252146777E-16 ]]) + + for i in range(2): + with self.subTest(use_etfs=bool(i)): + de = mc_grad.kernel(state=(0, 1), use_etfs=bool(i))[:, 0] + de *= np.sign(de[0]) * np.sign(de_ref[i, 0]) + # TODO: somehow confirm sign convention + self.assertAlmostEqual(de[0], de_ref[i, 0], 5) + self.assertAlmostEqual(de[1], de_ref[i, 1], 5) + + + def test_nac_h2_sa3casscf22_631g(self): + # z_orb: yes + # z_ci: no + # z_is: no + mc_grad = diatomic('H', 'H', 1.3, '6-31G', 2, 2, 3) + + # OpenMolcas v23.02 - PC + de_ref = np.array([[-2.61263051047980E-01,-2.61263051047980E-01], + [-5.77124316768522E-17,2.47338992900795E-17 ]]) + + for i in range(2): + with self.subTest(use_etfs=bool(i)): + de = mc_grad.kernel(state=(0, 1), use_etfs=bool(i))[:, 0] + de *= np.sign(de[0]) * np.sign(de_ref[i, 0]) + # TODO: somehow confirm sign convention + self.assertAlmostEqual(de[0], de_ref[i, 0], 5) + self.assertAlmostEqual(de[1], de_ref[i, 1], 5) + + def test_nac_lih_sa2casscf22_sto3g(self): + # z_orb: yes + # z_ci: yes + # z_is: yes + mc_grad = diatomic('Li', 'H', 1.5, 'STO-3G', 2, 2, 2) + + # OpenMolcas v23.02 - PC + de_ref = np.array([[1.83701729060390E-01, -6.91462064586138E-02], + [9.14842536971979E-02, -9.14842536971979E-02]]) + for i in range(2): + with self.subTest(use_etfs=bool(i)): + de = mc_grad.kernel(state=(0, 1), use_etfs=bool(i))[:, 0] + de *= np.sign(de[0]) * np.sign(de_ref[i, 0]) + # TODO: somehow confirm sign convention + self.assertAlmostEqual(de[0], de_ref[i, 0], 5) + self.assertAlmostEqual(de[1], de_ref[i, 1], 5) + + def test_nac_lih_cms3ftlda22_sto3g(self): + # z_orb: yes + # z_ci: no + # z_is: yes + mc_grad = diatomic('Li', 'H', 2.5, 'STO-3G', 2, 2, 3) + + # OpenMolcas v23.02 - PC + de_ref = np.array([[2.68015835251472E-01, -6.48474666167559E-02], + [1.24870721811750E-01, -1.24870721811750E-01]]) + + for i in range(2): + with self.subTest(use_etfs=bool(i)): + de = mc_grad.kernel(state=(0, 1), use_etfs=bool(i))[:, 0] + de *= np.sign(de[0]) * np.sign(de_ref[i, 0]) + # TODO: somehow confirm sign convention + self.assertAlmostEqual(de[0], de_ref[i, 0], 5) + self.assertAlmostEqual(de[1], de_ref[i, 1], 5) + + +if __name__ == "__main__": + print("Full Tests for SA-CASSCF non-adiabatic couplings of diatomic molecules") + unittest.main() From 83c62be47c622c1ee7bf2b2d63eab126c0090145 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Thu, 1 Feb 2024 11:26:46 -0800 Subject: [PATCH 10/44] Backward compatibility issue in fci.cistring --- pyscf/fci/cistring.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyscf/fci/cistring.py b/pyscf/fci/cistring.py index 0ef463d693..4de0ed169a 100644 --- a/pyscf/fci/cistring.py +++ b/pyscf/fci/cistring.py @@ -107,6 +107,8 @@ def gen_occs_iter(orb_list, nelec): return res occslst = gen_occs_iter(orb_list, nelec) return numpy.asarray(occslst, dtype=numpy.int32).view(OIndexList) +# Add this symbol for backward compatibility. Should remove in the future. +_gen_occslst = gen_occslst def _strs2occslst(strs, norb): na = len(strs) From 6f8ce439fbe0c26ec4d226e2225b28c7f668f922 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Sat, 3 Feb 2024 14:46:12 -0800 Subject: [PATCH 11/44] Release 2.5 --- CHANGELOG | 29 +++++++++++++++++++++++++++++ NOTICE | 5 ++++- README.md | 4 ++-- pyscf/__init__.py | 2 +- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 0131615df5..b199af5208 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,31 @@ +PySCF 2.5.0 (2024-02-03) +------------------------ +* Added + - SA-CASSCF Nonadiabatic Coupling Vectors + - The to_gpu function to convert pyscf objects to gpu4pyscf objects. + - 4th, and 5th order XC derivatives. +* Improved + - DIIS with damping for gapless systems. + - CPHF solver with level shift for gapless systems. + - The memory footprint for rsdf_builder and rsjk_builder. + - Use UHF natural orbital for RHF chkfile initial guess. + - Pipek Mezey + Stability check using Jacobi sweep. + - The conversion between FCI strings and addresses for more than 64 orbitals. + - The interface to the dftd3 and dftd4 dispersion correction modules. + - Switch off the sparsity treatment in DFT numerical integration for small system. + - Lattice-sum cutoff for non-orthogonal cell. + - Allow turning off AO symmetry for PBC DFT. +* Fixes + - cp2k basis parsers + - k2gamma for dft classes. + - Mole.magmom attribute serialization error. + - post-hf Gradients with Cartesian GTOs. + - Basis order problem in molden.load . + - PBC DFT Becke grids rounding error. + - PBC rsdf for un-sorted basis. + - The get_bands function with k-point symmetry. + + PySCF 2.4.0 (2023-10-16) ------------------------ * Added @@ -37,6 +65,7 @@ PySCF 2.4.0 (2023-10-16) - Assume 46 and 78 core configurations to be f-in-valence. - Coding styles and deprecated warnings from numpy. + PySCF 2.3.0 (2023-07-04) ------------------------ * Added diff --git a/NOTICE b/NOTICE index a21469aea7..dc52a6294c 100644 --- a/NOTICE +++ b/NOTICE @@ -102,7 +102,10 @@ Xiaojie Wu Pavel Pokhilko Frédéric Chapoton Daniel King - +Jiachen Li +Felipe S. S. Schneider +Aniruddha Seal +Peter Reinholdt --- diff --git a/README.md b/README.md index a0b2a39bf4..9a90da41cc 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,9 @@ Python-based Simulations of Chemistry Framework [![Build Status](https://github.com/pyscf/pyscf/workflows/CI/badge.svg)](https://github.com/pyscf/pyscf/actions?query=workflow%3ACI) [![codecov](https://codecov.io/gh/pyscf/pyscf/branch/master/graph/badge.svg)](https://codecov.io/gh/pyscf/pyscf) -2023-10-15 +2024-02-03 -* [Stable release 2.4.0](https://github.com/pyscf/pyscf/releases/tag/v2.4.0) +* [Stable release 2.5.0](https://github.com/pyscf/pyscf/releases/tag/v2.5.0) * [Changelog](../master/CHANGELOG) * [Documentation](http://www.pyscf.org) * [Installation](#installation) diff --git a/pyscf/__init__.py b/pyscf/__init__.py index 3e32baeb00..c775395d8b 100644 --- a/pyscf/__init__.py +++ b/pyscf/__init__.py @@ -35,7 +35,7 @@ ''' -__version__ = '2.4.0' +__version__ = '2.5.0' import os import sys From e1dedd25c7787433b8988e4b7ab11a4a23f38bc3 Mon Sep 17 00:00:00 2001 From: Zhihao Cui Date: Sat, 10 Feb 2024 15:46:06 -0500 Subject: [PATCH 12/44] fix magmom check in mol.copy() If magmom is an np.ndarray, `if mol.magmom` will raise error. Need to explicitly use `is not None` --- pyscf/gto/mole.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyscf/gto/mole.py b/pyscf/gto/mole.py index ab1e32c61c..b79074aa56 100644 --- a/pyscf/gto/mole.py +++ b/pyscf/gto/mole.py @@ -1209,7 +1209,7 @@ def copy(mol, deep=True): newmol._ecp = copy.deepcopy(mol._ecp) newmol.pseudo = copy.deepcopy(mol.pseudo) newmol._pseudo = copy.deepcopy(mol._pseudo) - if mol.magmom: + if mol.magmom is not None: newmol.magmom = list(mol.magmom) return newmol From c1898ba0cb5ef12f5519ad68377b3bfdb69d3f9d Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Thu, 15 Feb 2024 20:53:06 -0800 Subject: [PATCH 13/44] Move LebedevGrid C code to Python code; fix issue #2073 (#2076) * Move LebedevGrid C code to Python code; fix issue #2073 * Restore CxLebedevGrid.c and fix bug * lint error --- pyscf/dft/LebedevGrid.py | 5047 ++++++++++++++++++++++++++++++++++++++ pyscf/dft/gen_grid.py | 43 +- pyscf/solvent/ddcosmo.py | 4 +- pyscf/solvent/pcm.py | 4 +- 4 files changed, 5051 insertions(+), 47 deletions(-) create mode 100644 pyscf/dft/LebedevGrid.py diff --git a/pyscf/dft/LebedevGrid.py b/pyscf/dft/LebedevGrid.py new file mode 100644 index 0000000000..0fdfd69f45 --- /dev/null +++ b/pyscf/dft/LebedevGrid.py @@ -0,0 +1,5047 @@ +# This code was modified from CxLebedevGrid.cpp (from Gerald Knizia). +# The following comments are copied from the header file CxLebedevGrid.h +# +# +#ccgk: This code generates Lebedev grids. It is based on C files from +#ccgk: Dmitri Laikov, which were converted to Fortran by Christoph van Wuellen. +#ccgk: I (Gerald Knizia) subsequently converted them back to C++. +#ccgk: +#ccgk: The original distribution contained the following readme file: +#ccgk: +# +# Lebedev grids of orders n=6m+5 where m=0,1,...,21 in 16 digit precision +# ======================================================================= +# +# The file Lebedev-Laikov.F implements a set of subroutines providing +# Lebedev-Laikov grids of order n=2m+1, where m=1,2,...,15, and additionally +# grids of order n=6m+5, where m=5,6,...,21. The parameters ensure +# that angular integration of polynomials x**k * y**l * z**m, where k+l+m <= 131 +# can be performed with a relative accuracy of 2e-14 [1]. Note that the weights +# are normalised to add up to 1.0. +# +# For each order n a separate subroutine is provided named +# LD. The parameters X, Y, Z are arrays for the +# cartesian components of each point, and the parameter W is an array for the +# weights. The subroutines increase the integer parameter N by number of grid +# points generated. All these routines use the subroutine gen_oh which takes care +# of the octahedral symmetry of the grids. +# +# Christoph van Wuellen (Ruhr-Universitaet, Bochum, Germany) generated the +# routines in Lebedev-Laikov.F by translating the original C-routines kindly +# provided by Dmitri Laikov (Moscow State University, Moscow, Russia). We +# are in debt to Dmitri Laikov for giving us permission to make these routines +# publically available. +# +# Huub van Dam +# Daresbury Laboratory, Daresbury, United Kingdom +# April, 2000 +# +# References +# ========== +# +# [1] V.I. Lebedev, and D.N. Laikov +# "A quadrature formula for the sphere of the 131st +# algebraic order of accuracy" +# Doklady Mathematics, Vol. 59, No. 3, 1999, pp. 477-481. +# +#ccgk: and the following comments and references for the original of the subroutine SphGenOh: +# +# chvd +# chvd This subroutine is part of a set of subroutines that generate +# chvd Lebedev grids [1-6] for integration on a sphere. The original +# chvd C-code [1] was kindly provided by Dr. Dmitri N. Laikov and +# chvd translated into fortran by Dr. Christoph van Wuellen. +# chvd This subroutine was translated from C to fortran77 by hand. +# chvd +# chvd Users of this code are asked to include reference [1] in their +# chvd publications, and in the user- and programmers-manuals +# chvd describing their codes. +# chvd +# chvd This code was distributed through CCL (http://www.ccl.net/). +# chvd +# chvd [1] V.I. Lebedev, and D.N. Laikov +# chvd "A quadrature formula for the sphere of the 131st +# chvd algebraic order of accuracy" +# chvd Doklady Mathematics, Vol. 59, No. 3, 1999, pp. 477-481. +# chvd +# chvd [2] V.I. Lebedev +# chvd "A quadrature formula for the sphere of 59th algebraic +# chvd order of accuracy" +# chvd Russian Acad. Sci. Dokl. Math., Vol. 50, 1995, pp. 283-286. +# chvd +# chvd [3] V.I. Lebedev, and A.L. Skorokhodov +# chvd "Quadrature formulas of orders 41, 47, and 53 for the sphere" +# chvd Russian Acad. Sci. Dokl. Math., Vol. 45, 1992, pp. 587-592. +# chvd +# chvd [4] V.I. Lebedev +# chvd "Spherical quadrature formulas exact to orders 25-29" +# chvd Siberian Mathematical Journal, Vol. 18, 1977, pp. 99-107. +# chvd +# chvd [5] V.I. Lebedev +# chvd "Quadratures on a sphere" +# chvd Computational Mathematics and Mathematical Physics, Vol. 16, +# chvd 1976, pp. 10-24. +# chvd +# chvd [6] V.I. Lebedev +# chvd "Values of the nodes and weights of ninth to seventeenth +# chvd order Gauss-Markov quadrature formulae invariant under the +# chvd octahedron group with inversion" +# chvd Computational Mathematics and Mathematical Physics, Vol. 15, +# chvd 1975, pp. 44-51. +# chvd +# cvw +# cvw Given a point on a sphere (specified by a and b), generate all +# cvw the equivalent points under Oh symmetry, making grid points with +# cvw weight v. +# cvw The variable num is increased by the number of different points +# cvw generated. +# cvw +# cvw Depending on code, there are 6...48 different but equivalent +# cvw points. +# cvw +# cvw code=1: (0,0,1) etc ( 6 points) +# cvw code=2: (0,a,a) etc, a=1/sqrt(2) ( 12 points) +# cvw code=3: (a,a,a) etc, a=1/sqrt(3) ( 8 points) +# cvw code=4: (a,a,b) etc, b=sqrt(1-2 a^2) ( 24 points) +# cvw code=5: (a,b,0) etc, b=sqrt(1-a^2), a input ( 24 points) +# cvw code=6: (a,b,c) etc, c=sqrt(1-a^2-b^2), a/b input ( 48 points) +# cvw + +import numpy as np +from functools import lru_cache + +@lru_cache(maxsize=500) +def SphGenOh(code, a, b, v): + if code == 0: + a = 1.0 + g = np.array(( + # pos/x pos/y pos/z weight + a, 0., 0., v, + -a, 0., 0., v, + 0., a, 0., v, + 0., -a, 0., v, + 0., 0., a, v, + 0., 0., -a, v, + )).reshape(6, 4) + elif code == 1: + a = np.sqrt(0.5) + g = np.array(( + # pos/x pos/y pos/z weight + 0., a, a, v, + 0., -a, a, v, + 0., a, -a, v, + 0., -a, -a, v, + a, 0., a, v, + -a, 0., a, v, + a, 0., -a, v, + -a, 0., -a, v, + a, a, 0., v, + -a, a, 0., v, + a, -a, 0., v, + -a, -a, 0., v, + )).reshape(12, 4) + elif code == 2: + a = np.sqrt(1./3.) + g = np.array(( + # pos/x pos/y pos/z weight + a, a, a, v, + -a, a, a, v, + a, -a, a, v, + -a, -a, a, v, + a, a, -a, v, + -a, a, -a, v, + a, -a, -a, v, + -a, -a, -a, v, + )).reshape(8, 4) + elif code == 3: + b = np.sqrt(1. - 2.*a*a) + g = np.array(( + # pos/x pos/y pos/z weight + a, a, b, v, + -a, a, b, v, + a, -a, b, v, + -a, -a, b, v, + a, a, -b, v, + -a, a, -b, v, + a, -a, -b, v, + -a, -a, -b, v, + a, b, a, v, + -a, b, a, v, + a, -b, a, v, + -a, -b, a, v, + a, b, -a, v, + -a, b, -a, v, + a, -b, -a, v, + -a, -b, -a, v, + b, a, a, v, + -b, a, a, v, + b, -a, a, v, + -b, -a, a, v, + b, a, -a, v, + -b, a, -a, v, + b, -a, -a, v, + -b, -a, -a, v, + )).reshape(24, 4) + elif code == 4: + b = np.sqrt(1. - a*a) + g = np.array(( + # pos/x pos/y pos/z weight + a, b, 0., v, + -a, b, 0., v, + a, -b, 0., v, + -a, -b, 0., v, + b, a, 0., v, + -b, a, 0., v, + b, -a, 0., v, + -b, -a, 0., v, + a, 0., b, v, + -a, 0., b, v, + a, 0., -b, v, + -a, 0., -b, v, + b, 0., a, v, + -b, 0., a, v, + b, 0., -a, v, + -b, 0., -a, v, + 0., a, b, v, + 0., -a, b, v, + 0., a, -b, v, + 0., -a, -b, v, + 0., b, a, v, + 0., -b, a, v, + 0., b, -a, v, + 0., -b, -a, v, + )).reshape(24, 4) + elif code == 5: + c = np.sqrt(1. - a*a - b*b) + g = np.array(( + # pos/x pos/y pos/z weight + a, b, c, v, + -a, b, c, v, + a, -b, c, v, + -a, -b, c, v, + a, b, -c, v, + -a, b, -c, v, + a, -b, -c, v, + -a, -b, -c, v, + a, c, b, v, + -a, c, b, v, + a, -c, b, v, + -a, -c, b, v, + a, c, -b, v, + -a, c, -b, v, + a, -c, -b, v, + -a, -c, -b, v, + b, a, c, v, + -b, a, c, v, + b, -a, c, v, + -b, -a, c, v, + b, a, -c, v, + -b, a, -c, v, + b, -a, -c, v, + -b, -a, -c, v, + b, c, a, v, + -b, c, a, v, + b, -c, a, v, + -b, -c, a, v, + b, c, -a, v, + -b, c, -a, v, + b, -c, -a, v, + -b, -c, -a, v, + c, a, b, v, + -c, a, b, v, + c, -a, b, v, + -c, -a, b, v, + c, a, -b, v, + -c, a, -b, v, + c, -a, -b, v, + -c, -a, -b, v, + c, b, a, v, + -c, b, a, v, + c, -b, a, v, + -c, -b, a, v, + c, b, -a, v, + -c, b, -a, v, + c, -b, -a, v, + -c, -b, -a, v, + )).reshape(48, 4) + return g + + +def MakeAngularGrid_6(): + grids = [] + a = 0 + b = 0 + v = 0.1666666666666667e+0 + grids.append(SphGenOh(0, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_14(): + grids = [] + a = 0 + b = 0 + v = 0.6666666666666667e-1 + grids.append(SphGenOh(0, a, b, v)) + v = 0.7500000000000000e-1 + grids.append(SphGenOh(2, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_26(): + grids = [] + a = 0 + b = 0 + v = 0.4761904761904762e-1 + grids.append(SphGenOh(0, a, b, v)) + v = 0.3809523809523810e-1 + grids.append(SphGenOh(1, a, b, v)) + v = 0.3214285714285714e-1 + grids.append(SphGenOh(2, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_38(): + grids = [] + a = 0 + b = 0 + v = 0.9523809523809524e-2 + grids.append(SphGenOh(0, a, b, v)) + v = 0.3214285714285714e-1 + grids.append(SphGenOh(2, a, b, v)) + a = 0.4597008433809831e+0 + v = 0.2857142857142857e-1 + grids.append(SphGenOh(4, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_50(): + grids = [] + a = 0 + b = 0 + v = 0.1269841269841270e-1 + grids.append(SphGenOh(0, a, b, v)) + v = 0.2257495590828924e-1 + grids.append(SphGenOh(1, a, b, v)) + v = 0.2109375000000000e-1 + grids.append(SphGenOh(2, a, b, v)) + a = 0.3015113445777636e+0 + v = 0.2017333553791887e-1 + grids.append(SphGenOh(3, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_74(): + grids = [] + a = 0 + b = 0 + v = 0.5130671797338464e-3 + grids.append(SphGenOh(0, a, b, v)) + v = 0.1660406956574204e-1 + grids.append(SphGenOh(1, a, b, v)) + v = -0.2958603896103896e-1 + grids.append(SphGenOh(2, a, b, v)) + a = 0.4803844614152614e+0 + v = 0.2657620708215946e-1 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3207726489807764e+0 + v = 0.1652217099371571e-1 + grids.append(SphGenOh(4, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_86(): + grids = [] + a = 0 + b = 0 + v = 0.1154401154401154e-1 + grids.append(SphGenOh(0, a, b, v)) + v = 0.1194390908585628e-1 + grids.append(SphGenOh(2, a, b, v)) + a = 0.3696028464541502e+0 + v = 0.1111055571060340e-1 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6943540066026664e+0 + v = 0.1187650129453714e-1 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3742430390903412e+0 + v = 0.1181230374690448e-1 + grids.append(SphGenOh(4, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_110(): + grids = [] + a = 0 + b = 0 + v = 0.3828270494937162e-2 + grids.append(SphGenOh(0, a, b, v)) + v = 0.9793737512487512e-2 + grids.append(SphGenOh(2, a, b, v)) + a = 0.1851156353447362e+0 + v = 0.8211737283191111e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6904210483822922e+0 + v = 0.9942814891178103e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3956894730559419e+0 + v = 0.9595471336070963e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4783690288121502e+0 + v = 0.9694996361663028e-2 + grids.append(SphGenOh(4, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_146(): + grids = [] + a = 0 + b = 0 + v = 0.5996313688621381e-3 + grids.append(SphGenOh(0, a, b, v)) + v = 0.7372999718620756e-2 + grids.append(SphGenOh(1, a, b, v)) + v = 0.7210515360144488e-2 + grids.append(SphGenOh(2, a, b, v)) + a = 0.6764410400114264e+0 + v = 0.7116355493117555e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4174961227965453e+0 + v = 0.6753829486314477e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1574676672039082e+0 + v = 0.7574394159054034e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1403553811713183e+0 + b = 0.4493328323269557e+0 + v = 0.6991087353303262e-2 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_170(): + grids = [] + a = 0 + b = 0 + v = 0.5544842902037365e-2 + grids.append(SphGenOh(0, a, b, v)) + v = 0.6071332770670752e-2 + grids.append(SphGenOh(1, a, b, v)) + v = 0.6383674773515093e-2 + grids.append(SphGenOh(2, a, b, v)) + a = 0.2551252621114134e+0 + v = 0.5183387587747790e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6743601460362766e+0 + v = 0.6317929009813725e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4318910696719410e+0 + v = 0.6201670006589077e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2613931360335988e+0 + v = 0.5477143385137348e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4990453161796037e+0 + b = 0.1446630744325115e+0 + v = 0.5968383987681156e-2 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_194(): + grids = [] + a = 0 + b = 0 + v = 0.1782340447244611e-2 + grids.append(SphGenOh(0, a, b, v)) + v = 0.5716905949977102e-2 + grids.append(SphGenOh(1, a, b, v)) + v = 0.5573383178848738e-2 + grids.append(SphGenOh(2, a, b, v)) + a = 0.6712973442695226e+0 + v = 0.5608704082587997e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2892465627575439e+0 + v = 0.5158237711805383e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4446933178717437e+0 + v = 0.5518771467273614e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1299335447650067e+0 + v = 0.4106777028169394e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3457702197611283e+0 + v = 0.5051846064614808e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1590417105383530e+0 + b = 0.8360360154824589e+0 + v = 0.5530248916233094e-2 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_230(): + grids = [] + a = 0 + b = 0 + v = -0.5522639919727325e-1 + grids.append(SphGenOh(0, a, b, v)) + v = 0.4450274607445226e-2 + grids.append(SphGenOh(2, a, b, v)) + a = 0.4492044687397611e+0 + v = 0.4496841067921404e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2520419490210201e+0 + v = 0.5049153450478750e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6981906658447242e+0 + v = 0.3976408018051883e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6587405243460960e+0 + v = 0.4401400650381014e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4038544050097660e-1 + v = 0.1724544350544401e-1 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5823842309715585e+0 + v = 0.4231083095357343e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3545877390518688e+0 + v = 0.5198069864064399e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2272181808998187e+0 + b = 0.4864661535886647e+0 + v = 0.4695720972568883e-2 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_266(): + grids = [] + a = 0 + b = 0 + v = -0.1313769127326952e-2 + grids.append(SphGenOh(0, a, b, v)) + v = -0.2522728704859336e-2 + grids.append(SphGenOh(1, a, b, v)) + v = 0.4186853881700583e-2 + grids.append(SphGenOh(2, a, b, v)) + a = 0.7039373391585475e+0 + v = 0.5315167977810885e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1012526248572414e+0 + v = 0.4047142377086219e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4647448726420539e+0 + v = 0.4112482394406990e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3277420654971629e+0 + v = 0.3595584899758782e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6620338663699974e+0 + v = 0.4256131351428158e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.8506508083520399e+0 + v = 0.4229582700647240e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3233484542692899e+0 + b = 0.1153112011009701e+0 + v = 0.4080914225780505e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2314790158712601e+0 + b = 0.5244939240922365e+0 + v = 0.4071467593830964e-2 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_302(): + grids = [] + a = 0 + b = 0 + v = 0.8545911725128148e-3 + grids.append(SphGenOh(0, a, b, v)) + v = 0.3599119285025571e-2 + grids.append(SphGenOh(2, a, b, v)) + a = 0.3515640345570105e+0 + v = 0.3449788424305883e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6566329410219612e+0 + v = 0.3604822601419882e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4729054132581005e+0 + v = 0.3576729661743367e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.9618308522614784e-1 + v = 0.2352101413689164e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2219645236294178e+0 + v = 0.3108953122413675e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7011766416089545e+0 + v = 0.3650045807677255e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2644152887060663e+0 + v = 0.2982344963171804e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5718955891878961e+0 + v = 0.3600820932216460e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2510034751770465e+0 + b = 0.8000727494073952e+0 + v = 0.3571540554273387e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1233548532583327e+0 + b = 0.4127724083168531e+0 + v = 0.3392312205006170e-2 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_350(): + grids = [] + a = 0 + b = 0 + v = 0.3006796749453936e-2 + grids.append(SphGenOh(0, a, b, v)) + v = 0.3050627745650771e-2 + grids.append(SphGenOh(2, a, b, v)) + a = 0.7068965463912316e+0 + v = 0.1621104600288991e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4794682625712025e+0 + v = 0.3005701484901752e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1927533154878019e+0 + v = 0.2990992529653774e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6930357961327123e+0 + v = 0.2982170644107595e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3608302115520091e+0 + v = 0.2721564237310992e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6498486161496169e+0 + v = 0.3033513795811141e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1932945013230339e+0 + v = 0.3007949555218533e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3800494919899303e+0 + v = 0.2881964603055307e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2899558825499574e+0 + b = 0.7934537856582316e+0 + v = 0.2958357626535696e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.9684121455103957e-1 + b = 0.8280801506686862e+0 + v = 0.3036020026407088e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1833434647041659e+0 + b = 0.9074658265305127e+0 + v = 0.2832187403926303e-2 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_434(): + grids = [] + a = 0 + b = 0 + v = 0.5265897968224436e-3 + grids.append(SphGenOh(0, a, b, v)) + v = 0.2548219972002607e-2 + grids.append(SphGenOh(1, a, b, v)) + v = 0.2512317418927307e-2 + grids.append(SphGenOh(2, a, b, v)) + a = 0.6909346307509111e+0 + v = 0.2530403801186355e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1774836054609158e+0 + v = 0.2014279020918528e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4914342637784746e+0 + v = 0.2501725168402936e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6456664707424256e+0 + v = 0.2513267174597564e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2861289010307638e+0 + v = 0.2302694782227416e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7568084367178018e-1 + v = 0.1462495621594614e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3927259763368002e+0 + v = 0.2445373437312980e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.8818132877794288e+0 + v = 0.2417442375638981e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.9776428111182649e+0 + v = 0.1910951282179532e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2054823696403044e+0 + b = 0.8689460322872412e+0 + v = 0.2416930044324775e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5905157048925271e+0 + b = 0.7999278543857286e+0 + v = 0.2512236854563495e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5550152361076807e+0 + b = 0.7717462626915901e+0 + v = 0.2496644054553086e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.9371809858553722e+0 + b = 0.3344363145343455e+0 + v = 0.2236607760437849e-2 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_590(): + grids = [] + a = 0 + b = 0 + v = 0.3095121295306187e-3 + grids.append(SphGenOh(0, a, b, v)) + v = 0.1852379698597489e-2 + grids.append(SphGenOh(2, a, b, v)) + a = 0.7040954938227469e+0 + v = 0.1871790639277744e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6807744066455243e+0 + v = 0.1858812585438317e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6372546939258752e+0 + v = 0.1852028828296213e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5044419707800358e+0 + v = 0.1846715956151242e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4215761784010967e+0 + v = 0.1818471778162769e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3317920736472123e+0 + v = 0.1749564657281154e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2384736701421887e+0 + v = 0.1617210647254411e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1459036449157763e+0 + v = 0.1384737234851692e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6095034115507196e-1 + v = 0.9764331165051050e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6116843442009876e+0 + v = 0.1857161196774078e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3964755348199858e+0 + v = 0.1705153996395864e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1724782009907724e+0 + v = 0.1300321685886048e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5610263808622060e+0 + b = 0.3518280927733519e+0 + v = 0.1842866472905286e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4742392842551980e+0 + b = 0.2634716655937950e+0 + v = 0.1802658934377451e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5984126497885380e+0 + b = 0.1816640840360209e+0 + v = 0.1849830560443660e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3791035407695563e+0 + b = 0.1720795225656878e+0 + v = 0.1713904507106709e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2778673190586244e+0 + b = 0.8213021581932511e-1 + v = 0.1555213603396808e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5033564271075117e+0 + b = 0.8999205842074875e-1 + v = 0.1802239128008525e-2 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_770(): + grids = [] + a = 0 + b = 0 + v = 0.2192942088181184e-3 + grids.append(SphGenOh(0, a, b, v)) + v = 0.1436433617319080e-2 + grids.append(SphGenOh(1, a, b, v)) + v = 0.1421940344335877e-2 + grids.append(SphGenOh(2, a, b, v)) + a = 0.5087204410502360e-1 + v = 0.6798123511050502e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1228198790178831e+0 + v = 0.9913184235294912e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2026890814408786e+0 + v = 0.1180207833238949e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2847745156464294e+0 + v = 0.1296599602080921e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3656719078978026e+0 + v = 0.1365871427428316e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4428264886713469e+0 + v = 0.1402988604775325e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5140619627249735e+0 + v = 0.1418645563595609e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6306401219166803e+0 + v = 0.1421376741851662e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6716883332022612e+0 + v = 0.1423996475490962e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6979792685336881e+0 + v = 0.1431554042178567e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1446865674195309e+0 + v = 0.9254401499865368e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3390263475411216e+0 + v = 0.1250239995053509e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5335804651263506e+0 + v = 0.1394365843329230e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6944024393349413e-1 + b = 0.2355187894242326e+0 + v = 0.1127089094671749e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2269004109529460e+0 + b = 0.4102182474045730e+0 + v = 0.1345753760910670e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.8025574607775339e-1 + b = 0.6214302417481605e+0 + v = 0.1424957283316783e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1467999527896572e+0 + b = 0.3245284345717394e+0 + v = 0.1261523341237750e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1571507769824727e+0 + b = 0.5224482189696630e+0 + v = 0.1392547106052696e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2365702993157246e+0 + b = 0.6017546634089558e+0 + v = 0.1418761677877656e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.7714815866765732e-1 + b = 0.4346575516141163e+0 + v = 0.1338366684479554e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3062936666210730e+0 + b = 0.4908826589037616e+0 + v = 0.1393700862676131e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3822477379524787e+0 + b = 0.5648768149099500e+0 + v = 0.1415914757466932e-2 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_974(): + grids = [] + a = 0 + b = 0 + v = 0.1438294190527431e-3 + grids.append(SphGenOh(0, a, b, v)) + v = 0.1125772288287004e-2 + grids.append(SphGenOh(2, a, b, v)) + a = 0.4292963545341347e-1 + v = 0.4948029341949241e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1051426854086404e+0 + v = 0.7357990109125470e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1750024867623087e+0 + v = 0.8889132771304384e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2477653379650257e+0 + v = 0.9888347838921435e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3206567123955957e+0 + v = 0.1053299681709471e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3916520749849983e+0 + v = 0.1092778807014578e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4590825874187624e+0 + v = 0.1114389394063227e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5214563888415861e+0 + v = 0.1123724788051555e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6253170244654199e+0 + v = 0.1125239325243814e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6637926744523170e+0 + v = 0.1126153271815905e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6910410398498301e+0 + v = 0.1130286931123841e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7052907007457760e+0 + v = 0.1134986534363955e-2 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1236686762657990e+0 + v = 0.6823367927109931e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2940777114468387e+0 + v = 0.9454158160447096e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4697753849207649e+0 + v = 0.1074429975385679e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6334563241139567e+0 + v = 0.1129300086569132e-2 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5974048614181342e-1 + b = 0.2029128752777523e+0 + v = 0.8436884500901954e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1375760408473636e+0 + b = 0.4602621942484054e+0 + v = 0.1075255720448885e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3391016526336286e+0 + b = 0.5030673999662036e+0 + v = 0.1108577236864462e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1271675191439820e+0 + b = 0.2817606422442134e+0 + v = 0.9566475323783357e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2693120740413512e+0 + b = 0.4331561291720157e+0 + v = 0.1080663250717391e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1419786452601918e+0 + b = 0.6256167358580814e+0 + v = 0.1126797131196295e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6709284600738255e-1 + b = 0.3798395216859157e+0 + v = 0.1022568715358061e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.7057738183256172e-1 + b = 0.5517505421423520e+0 + v = 0.1108960267713108e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2783888477882155e+0 + b = 0.6029619156159187e+0 + v = 0.1122790653435766e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1979578938917407e+0 + b = 0.3589606329589096e+0 + v = 0.1032401847117460e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2087307061103274e+0 + b = 0.5348666438135476e+0 + v = 0.1107249382283854e-2 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4055122137872836e+0 + b = 0.5674997546074373e+0 + v = 0.1121780048519972e-2 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_1202(): + grids = [] + a = 0 + b = 0 + v = 0.1105189233267572e-3 + grids.append(SphGenOh(0, a, b, v)) + v = 0.9205232738090741e-3 + grids.append(SphGenOh(1, a, b, v)) + v = 0.9133159786443561e-3 + grids.append(SphGenOh(2, a, b, v)) + a = 0.3712636449657089e-1 + v = 0.3690421898017899e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.9140060412262223e-1 + v = 0.5603990928680660e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1531077852469906e+0 + v = 0.6865297629282609e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2180928891660612e+0 + v = 0.7720338551145630e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2839874532200175e+0 + v = 0.8301545958894795e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3491177600963764e+0 + v = 0.8686692550179628e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4121431461444309e+0 + v = 0.8927076285846890e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4718993627149127e+0 + v = 0.9060820238568219e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5273145452842337e+0 + v = 0.9119777254940867e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6209475332444019e+0 + v = 0.9128720138604181e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6569722711857291e+0 + v = 0.9130714935691735e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6841788309070143e+0 + v = 0.9152873784554116e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7012604330123631e+0 + v = 0.9187436274321654e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1072382215478166e+0 + v = 0.5176977312965694e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2582068959496968e+0 + v = 0.7331143682101417e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4172752955306717e+0 + v = 0.8463232836379928e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5700366911792503e+0 + v = 0.9031122694253992e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.9827986018263947e+0 + b = 0.1771774022615325e+0 + v = 0.6485778453163257e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.9624249230326228e+0 + b = 0.2475716463426288e+0 + v = 0.7435030910982369e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.9402007994128811e+0 + b = 0.3354616289066489e+0 + v = 0.7998527891839054e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.9320822040143202e+0 + b = 0.3173615246611977e+0 + v = 0.8101731497468018e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.9043674199393299e+0 + b = 0.4090268427085357e+0 + v = 0.8483389574594331e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.8912407560074747e+0 + b = 0.3854291150669224e+0 + v = 0.8556299257311812e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.8676435628462708e+0 + b = 0.4932221184851285e+0 + v = 0.8803208679738260e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.8581979986041619e+0 + b = 0.4785320675922435e+0 + v = 0.8811048182425720e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.8396753624049856e+0 + b = 0.4507422593157064e+0 + v = 0.8850282341265444e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.8165288564022188e+0 + b = 0.5632123020762100e+0 + v = 0.9021342299040653e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.8015469370783529e+0 + b = 0.5434303569693900e+0 + v = 0.9010091677105086e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.7773563069070351e+0 + b = 0.5123518486419871e+0 + v = 0.9022692938426915e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.7661621213900394e+0 + b = 0.6394279634749102e+0 + v = 0.9158016174693465e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.7553584143533510e+0 + b = 0.6269805509024392e+0 + v = 0.9131578003189435e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.7344305757559503e+0 + b = 0.6031161693096310e+0 + v = 0.9107813579482705e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.7043837184021765e+0 + b = 0.5693702498468441e+0 + v = 0.9105760258970126e-3 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_1454(): + grids = [] + a = 0 + b = 0 + v = 0.7777160743261247e-4 + grids.append(SphGenOh(0, a, b, v)) + v = 0.7557646413004701e-3 + grids.append(SphGenOh(2, a, b, v)) + a = 0.3229290663413854e-1 + v = 0.2841633806090617e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.8036733271462222e-1 + v = 0.4374419127053555e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1354289960531653e+0 + v = 0.5417174740872172e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1938963861114426e+0 + v = 0.6148000891358593e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2537343715011275e+0 + v = 0.6664394485800705e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3135251434752570e+0 + v = 0.7025039356923220e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3721558339375338e+0 + v = 0.7268511789249627e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4286809575195696e+0 + v = 0.7422637534208629e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4822510128282994e+0 + v = 0.7509545035841214e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5320679333566263e+0 + v = 0.7548535057718401e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6172998195394274e+0 + v = 0.7554088969774001e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6510679849127481e+0 + v = 0.7553147174442808e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6777315251687360e+0 + v = 0.7564767653292297e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6963109410648741e+0 + v = 0.7587991808518730e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7058935009831749e+0 + v = 0.7608261832033027e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.9955546194091857e+0 + v = 0.4021680447874916e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.9734115901794209e+0 + v = 0.5804871793945964e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.9275693732388626e+0 + v = 0.6792151955945159e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.8568022422795103e+0 + v = 0.7336741211286294e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.7623495553719372e+0 + v = 0.7581866300989608e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5707522908892223e+0 + b = 0.4387028039889501e+0 + v = 0.7538257859800743e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5196463388403083e+0 + b = 0.3858908414762617e+0 + v = 0.7483517247053123e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4646337531215351e+0 + b = 0.3301937372343854e+0 + v = 0.7371763661112059e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4063901697557691e+0 + b = 0.2725423573563777e+0 + v = 0.7183448895756934e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3456329466643087e+0 + b = 0.2139510237495250e+0 + v = 0.6895815529822191e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2831395121050332e+0 + b = 0.1555922309786647e+0 + v = 0.6480105801792886e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2197682022925330e+0 + b = 0.9892878979686097e-1 + v = 0.5897558896594636e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1564696098650355e+0 + b = 0.4598642910675510e-1 + v = 0.5095708849247346e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6027356673721295e+0 + b = 0.3376625140173426e+0 + v = 0.7536906428909755e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5496032320255096e+0 + b = 0.2822301309727988e+0 + v = 0.7472505965575118e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4921707755234567e+0 + b = 0.2248632342592540e+0 + v = 0.7343017132279698e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4309422998598483e+0 + b = 0.1666224723456479e+0 + v = 0.7130871582177445e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3664108182313672e+0 + b = 0.1086964901822169e+0 + v = 0.6817022032112776e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2990189057758436e+0 + b = 0.5251989784120085e-1 + v = 0.6380941145604121e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6268724013144998e+0 + b = 0.2297523657550023e+0 + v = 0.7550381377920310e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5707324144834607e+0 + b = 0.1723080607093800e+0 + v = 0.7478646640144802e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5096360901960365e+0 + b = 0.1140238465390513e+0 + v = 0.7335918720601220e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4438729938312456e+0 + b = 0.5611522095882537e-1 + v = 0.7110120527658118e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6419978471082389e+0 + b = 0.1164174423140873e+0 + v = 0.7571363978689501e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5817218061802611e+0 + b = 0.5797589531445219e-1 + v = 0.7489908329079234e-3 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_1730(): + grids = [] + a = 0 + b = 0 + v = 0.6309049437420976e-4 + grids.append(SphGenOh(0, a, b, v)) + v = 0.6398287705571748e-3 + grids.append(SphGenOh(1, a, b, v)) + v = 0.6357185073530720e-3 + grids.append(SphGenOh(2, a, b, v)) + a = 0.2860923126194662e-1 + v = 0.2221207162188168e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7142556767711522e-1 + v = 0.3475784022286848e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1209199540995559e+0 + v = 0.4350742443589804e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1738673106594379e+0 + v = 0.4978569136522127e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2284645438467734e+0 + v = 0.5435036221998053e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2834807671701512e+0 + v = 0.5765913388219542e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3379680145467339e+0 + v = 0.6001200359226003e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3911355454819537e+0 + v = 0.6162178172717512e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4422860353001403e+0 + v = 0.6265218152438485e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4907781568726057e+0 + v = 0.6323987160974212e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5360006153211468e+0 + v = 0.6350767851540569e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6142105973596603e+0 + v = 0.6354362775297107e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6459300387977504e+0 + v = 0.6352302462706235e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6718056125089225e+0 + v = 0.6358117881417972e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6910888533186254e+0 + v = 0.6373101590310117e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7030467416823252e+0 + v = 0.6390428961368665e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.8354951166354646e-1 + v = 0.3186913449946576e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2050143009099486e+0 + v = 0.4678028558591711e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3370208290706637e+0 + v = 0.5538829697598626e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4689051484233963e+0 + v = 0.6044475907190476e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5939400424557334e+0 + v = 0.6313575103509012e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1394983311832261e+0 + b = 0.4097581162050343e-1 + v = 0.4078626431855630e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1967999180485014e+0 + b = 0.8851987391293348e-1 + v = 0.4759933057812725e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2546183732548967e+0 + b = 0.1397680182969819e+0 + v = 0.5268151186413440e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3121281074713875e+0 + b = 0.1929452542226526e+0 + v = 0.5643048560507316e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3685981078502492e+0 + b = 0.2467898337061562e+0 + v = 0.5914501076613073e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4233760321547856e+0 + b = 0.3003104124785409e+0 + v = 0.6104561257874195e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4758671236059246e+0 + b = 0.3526684328175033e+0 + v = 0.6230252860707806e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5255178579796463e+0 + b = 0.4031134861145713e+0 + v = 0.6305618761760796e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5718025633734589e+0 + b = 0.4509426448342351e+0 + v = 0.6343092767597889e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2686927772723415e+0 + b = 0.4711322502423248e-1 + v = 0.5176268945737826e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3306006819904809e+0 + b = 0.9784487303942695e-1 + v = 0.5564840313313692e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3904906850594983e+0 + b = 0.1505395810025273e+0 + v = 0.5856426671038980e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4479957951904390e+0 + b = 0.2039728156296050e+0 + v = 0.6066386925777091e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5027076848919780e+0 + b = 0.2571529941121107e+0 + v = 0.6208824962234458e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5542087392260217e+0 + b = 0.3092191375815670e+0 + v = 0.6296314297822907e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6020850887375187e+0 + b = 0.3593807506130276e+0 + v = 0.6340423756791859e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4019851409179594e+0 + b = 0.5063389934378671e-1 + v = 0.5829627677107342e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4635614567449800e+0 + b = 0.1032422269160612e+0 + v = 0.6048693376081110e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5215860931591575e+0 + b = 0.1566322094006254e+0 + v = 0.6202362317732461e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5758202499099271e+0 + b = 0.2098082827491099e+0 + v = 0.6299005328403779e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6259893683876795e+0 + b = 0.2618824114553391e+0 + v = 0.6347722390609353e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5313795124811891e+0 + b = 0.5263245019338556e-1 + v = 0.6203778981238834e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5893317955931995e+0 + b = 0.1061059730982005e+0 + v = 0.6308414671239979e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6426246321215801e+0 + b = 0.1594171564034221e+0 + v = 0.6362706466959498e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6511904367376113e+0 + b = 0.5354789536565540e-1 + v = 0.6375414170333233e-3 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_2030(): + grids = [] + a = 0 + b = 0 + v = 0.4656031899197431e-4 + grids.append(SphGenOh(0, a, b, v)) + v = 0.5421549195295507e-3 + grids.append(SphGenOh(2, a, b, v)) + a = 0.2540835336814348e-1 + v = 0.1778522133346553e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6399322800504915e-1 + v = 0.2811325405682796e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1088269469804125e+0 + v = 0.3548896312631459e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1570670798818287e+0 + v = 0.4090310897173364e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2071163932282514e+0 + v = 0.4493286134169965e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2578914044450844e+0 + v = 0.4793728447962723e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3085687558169623e+0 + v = 0.5015415319164265e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3584719706267024e+0 + v = 0.5175127372677937e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4070135594428709e+0 + v = 0.5285522262081019e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4536618626222638e+0 + v = 0.5356832703713962e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4979195686463577e+0 + v = 0.5397914736175170e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5393075111126999e+0 + v = 0.5416899441599930e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6115617676843916e+0 + v = 0.5419308476889938e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6414308435160159e+0 + v = 0.5416936902030596e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6664099412721607e+0 + v = 0.5419544338703164e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6859161771214913e+0 + v = 0.5428983656630975e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6993625593503890e+0 + v = 0.5442286500098193e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7062393387719380e+0 + v = 0.5452250345057301e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7479028168349763e-1 + v = 0.2568002497728530e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1848951153969366e+0 + v = 0.3827211700292145e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3059529066581305e+0 + v = 0.4579491561917824e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4285556101021362e+0 + v = 0.5042003969083574e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5468758653496526e+0 + v = 0.5312708889976025e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6565821978343439e+0 + v = 0.5438401790747117e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1253901572367117e+0 + b = 0.3681917226439641e-1 + v = 0.3316041873197344e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1775721510383941e+0 + b = 0.7982487607213301e-1 + v = 0.3899113567153771e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2305693358216114e+0 + b = 0.1264640966592335e+0 + v = 0.4343343327201309e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2836502845992063e+0 + b = 0.1751585683418957e+0 + v = 0.4679415262318919e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3361794746232590e+0 + b = 0.2247995907632670e+0 + v = 0.4930847981631031e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3875979172264824e+0 + b = 0.2745299257422246e+0 + v = 0.5115031867540091e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4374019316999074e+0 + b = 0.3236373482441118e+0 + v = 0.5245217148457367e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4851275843340022e+0 + b = 0.3714967859436741e+0 + v = 0.5332041499895321e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5303391803806868e+0 + b = 0.4175353646321745e+0 + v = 0.5384583126021542e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5726197380596287e+0 + b = 0.4612084406355461e+0 + v = 0.5411067210798852e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2431520732564863e+0 + b = 0.4258040133043952e-1 + v = 0.4259797391468714e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3002096800895869e+0 + b = 0.8869424306722721e-1 + v = 0.4604931368460021e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3558554457457432e+0 + b = 0.1368811706510655e+0 + v = 0.4871814878255202e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4097782537048887e+0 + b = 0.1860739985015033e+0 + v = 0.5072242910074885e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4616337666067458e+0 + b = 0.2354235077395853e+0 + v = 0.5217069845235350e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5110707008417874e+0 + b = 0.2842074921347011e+0 + v = 0.5315785966280310e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5577415286163795e+0 + b = 0.3317784414984102e+0 + v = 0.5376833708758905e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6013060431366950e+0 + b = 0.3775299002040700e+0 + v = 0.5408032092069521e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3661596767261781e+0 + b = 0.4599367887164592e-1 + v = 0.4842744917904866e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4237633153506581e+0 + b = 0.9404893773654421e-1 + v = 0.5048926076188130e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4786328454658452e+0 + b = 0.1431377109091971e+0 + v = 0.5202607980478373e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5305702076789774e+0 + b = 0.1924186388843570e+0 + v = 0.5309932388325743e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5793436224231788e+0 + b = 0.2411590944775190e+0 + v = 0.5377419770895208e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6247069017094747e+0 + b = 0.2886871491583605e+0 + v = 0.5411696331677717e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4874315552535204e+0 + b = 0.4804978774953206e-1 + v = 0.5197996293282420e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5427337322059053e+0 + b = 0.9716857199366665e-1 + v = 0.5311120836622945e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5943493747246700e+0 + b = 0.1465205839795055e+0 + v = 0.5384309319956951e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6421314033564943e+0 + b = 0.1953579449803574e+0 + v = 0.5421859504051886e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6020628374713980e+0 + b = 0.4916375015738108e-1 + v = 0.5390948355046314e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6529222529856881e+0 + b = 0.9861621540127005e-1 + v = 0.5433312705027845e-3 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_2354(): + grids = [] + a = 0 + b = 0 + v = 0.3922616270665292e-4 + grids.append(SphGenOh(0, a, b, v)) + v = 0.4703831750854424e-3 + grids.append(SphGenOh(1, a, b, v)) + v = 0.4678202801282136e-3 + grids.append(SphGenOh(2, a, b, v)) + a = 0.2290024646530589e-1 + v = 0.1437832228979900e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5779086652271284e-1 + v = 0.2303572493577644e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.9863103576375984e-1 + v = 0.2933110752447454e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1428155792982185e+0 + v = 0.3402905998359838e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1888978116601463e+0 + v = 0.3759138466870372e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2359091682970210e+0 + v = 0.4030638447899798e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2831228833706171e+0 + v = 0.4236591432242211e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3299495857966693e+0 + v = 0.4390522656946746e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3758840802660796e+0 + v = 0.4502523466626247e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4204751831009480e+0 + v = 0.4580577727783541e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4633068518751051e+0 + v = 0.4631391616615899e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5039849474507313e+0 + v = 0.4660928953698676e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5421265793440747e+0 + v = 0.4674751807936953e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6092660230557310e+0 + v = 0.4676414903932920e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6374654204984869e+0 + v = 0.4674086492347870e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6615136472609892e+0 + v = 0.4674928539483207e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6809487285958127e+0 + v = 0.4680748979686447e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6952980021665196e+0 + v = 0.4690449806389040e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7041245497695400e+0 + v = 0.4699877075860818e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6744033088306065e-1 + v = 0.2099942281069176e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1678684485334166e+0 + v = 0.3172269150712804e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2793559049539613e+0 + v = 0.3832051358546523e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3935264218057639e+0 + v = 0.4252193818146985e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5052629268232558e+0 + v = 0.4513807963755000e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6107905315437531e+0 + v = 0.4657797469114178e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1135081039843524e+0 + b = 0.3331954884662588e-1 + v = 0.2733362800522836e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1612866626099378e+0 + b = 0.7247167465436538e-1 + v = 0.3235485368463559e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2100786550168205e+0 + b = 0.1151539110849745e+0 + v = 0.3624908726013453e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2592282009459942e+0 + b = 0.1599491097143677e+0 + v = 0.3925540070712828e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3081740561320203e+0 + b = 0.2058699956028027e+0 + v = 0.4156129781116235e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3564289781578164e+0 + b = 0.2521624953502911e+0 + v = 0.4330644984623263e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4035587288240703e+0 + b = 0.2982090785797674e+0 + v = 0.4459677725921312e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4491671196373903e+0 + b = 0.3434762087235733e+0 + v = 0.4551593004456795e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4928854782917489e+0 + b = 0.3874831357203437e+0 + v = 0.4613341462749918e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5343646791958988e+0 + b = 0.4297814821746926e+0 + v = 0.4651019618269806e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5732683216530990e+0 + b = 0.4699402260943537e+0 + v = 0.4670249536100625e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2214131583218986e+0 + b = 0.3873602040643895e-1 + v = 0.3549555576441708e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2741796504750071e+0 + b = 0.8089496256902013e-1 + v = 0.3856108245249010e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3259797439149485e+0 + b = 0.1251732177620872e+0 + v = 0.4098622845756882e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3765441148826891e+0 + b = 0.1706260286403185e+0 + v = 0.4286328604268950e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4255773574530558e+0 + b = 0.2165115147300408e+0 + v = 0.4427802198993945e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4727795117058430e+0 + b = 0.2622089812225259e+0 + v = 0.4530473511488561e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5178546895819012e+0 + b = 0.3071721431296201e+0 + v = 0.4600805475703138e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5605141192097460e+0 + b = 0.3508998998801138e+0 + v = 0.4644599059958017e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6004763319352512e+0 + b = 0.3929160876166931e+0 + v = 0.4667274455712508e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3352842634946949e+0 + b = 0.4202563457288019e-1 + v = 0.4069360518020356e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3891971629814670e+0 + b = 0.8614309758870850e-1 + v = 0.4260442819919195e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4409875565542281e+0 + b = 0.1314500879380001e+0 + v = 0.4408678508029063e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4904893058592484e+0 + b = 0.1772189657383859e+0 + v = 0.4518748115548597e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5375056138769549e+0 + b = 0.2228277110050294e+0 + v = 0.4595564875375116e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5818255708669969e+0 + b = 0.2677179935014386e+0 + v = 0.4643988774315846e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6232334858144959e+0 + b = 0.3113675035544165e+0 + v = 0.4668827491646946e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4489485354492058e+0 + b = 0.4409162378368174e-1 + v = 0.4400541823741973e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5015136875933150e+0 + b = 0.8939009917748489e-1 + v = 0.4514512890193797e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5511300550512623e+0 + b = 0.1351806029383365e+0 + v = 0.4596198627347549e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5976720409858000e+0 + b = 0.1808370355053196e+0 + v = 0.4648659016801781e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6409956378989354e+0 + b = 0.2257852192301602e+0 + v = 0.4675502017157673e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5581222330827514e+0 + b = 0.4532173421637160e-1 + v = 0.4598494476455523e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6074705984161695e+0 + b = 0.9117488031840314e-1 + v = 0.4654916955152048e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6532272537379033e+0 + b = 0.1369294213140155e+0 + v = 0.4684709779505137e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6594761494500487e+0 + b = 0.4589901487275583e-1 + v = 0.4691445539106986e-3 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_2702(): + grids = [] + a = 0 + b = 0 + v = 0.2998675149888161e-4 + grids.append(SphGenOh(0, a, b, v)) + v = 0.4077860529495355e-3 + grids.append(SphGenOh(2, a, b, v)) + a = 0.2065562538818703e-1 + v = 0.1185349192520667e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5250918173022379e-1 + v = 0.1913408643425751e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.8993480082038376e-1 + v = 0.2452886577209897e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1306023924436019e+0 + v = 0.2862408183288702e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1732060388531418e+0 + v = 0.3178032258257357e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2168727084820249e+0 + v = 0.3422945667633690e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2609528309173586e+0 + v = 0.3612790520235922e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3049252927938952e+0 + v = 0.3758638229818521e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3483484138084404e+0 + v = 0.3868711798859953e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3908321549106406e+0 + v = 0.3949429933189938e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4320210071894814e+0 + v = 0.4006068107541156e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4715824795890053e+0 + v = 0.4043192149672723e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5091984794078453e+0 + v = 0.4064947495808078e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5445580145650803e+0 + v = 0.4075245619813152e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6072575796841768e+0 + v = 0.4076423540893566e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6339484505755803e+0 + v = 0.4074280862251555e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6570718257486958e+0 + v = 0.4074163756012244e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6762557330090709e+0 + v = 0.4077647795071246e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6911161696923790e+0 + v = 0.4084517552782530e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7012841911659961e+0 + v = 0.4092468459224052e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7064559272410020e+0 + v = 0.4097872687240906e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6123554989894765e-1 + v = 0.1738986811745028e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1533070348312393e+0 + v = 0.2659616045280191e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2563902605244206e+0 + v = 0.3240596008171533e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3629346991663361e+0 + v = 0.3621195964432943e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4683949968987538e+0 + v = 0.3868838330760539e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5694479240657952e+0 + v = 0.4018911532693111e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6634465430993955e+0 + v = 0.4089929432983252e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1033958573552305e+0 + b = 0.3034544009063584e-1 + v = 0.2279907527706409e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1473521412414395e+0 + b = 0.6618803044247135e-1 + v = 0.2715205490578897e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1924552158705967e+0 + b = 0.1054431128987715e+0 + v = 0.3057917896703976e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2381094362890328e+0 + b = 0.1468263551238858e+0 + v = 0.3326913052452555e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2838121707936760e+0 + b = 0.1894486108187886e+0 + v = 0.3537334711890037e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3291323133373415e+0 + b = 0.2326374238761579e+0 + v = 0.3700567500783129e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3736896978741460e+0 + b = 0.2758485808485768e+0 + v = 0.3825245372589122e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4171406040760013e+0 + b = 0.3186179331996921e+0 + v = 0.3918125171518296e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4591677985256915e+0 + b = 0.3605329796303794e+0 + v = 0.3984720419937579e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4994733831718418e+0 + b = 0.4012147253586509e+0 + v = 0.4029746003338211e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5377731830445096e+0 + b = 0.4403050025570692e+0 + v = 0.4057428632156627e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5737917830001331e+0 + b = 0.4774565904277483e+0 + v = 0.4071719274114857e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2027323586271389e+0 + b = 0.3544122504976147e-1 + v = 0.2990236950664119e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2516942375187273e+0 + b = 0.7418304388646328e-1 + v = 0.3262951734212878e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3000227995257181e+0 + b = 0.1150502745727186e+0 + v = 0.3482634608242413e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3474806691046342e+0 + b = 0.1571963371209364e+0 + v = 0.3656596681700892e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3938103180359209e+0 + b = 0.1999631877247100e+0 + v = 0.3791740467794218e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4387519590455703e+0 + b = 0.2428073457846535e+0 + v = 0.3894034450156905e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4820503960077787e+0 + b = 0.2852575132906155e+0 + v = 0.3968600245508371e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5234573778475101e+0 + b = 0.3268884208674639e+0 + v = 0.4019931351420050e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5627318647235282e+0 + b = 0.3673033321675939e+0 + v = 0.4052108801278599e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5996390607156954e+0 + b = 0.4061211551830290e+0 + v = 0.4068978613940934e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3084780753791947e+0 + b = 0.3860125523100059e-1 + v = 0.3454275351319704e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3589988275920223e+0 + b = 0.7928938987104867e-1 + v = 0.3629963537007920e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4078628415881973e+0 + b = 0.1212614643030087e+0 + v = 0.3770187233889873e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4549287258889735e+0 + b = 0.1638770827382693e+0 + v = 0.3878608613694378e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5000278512957279e+0 + b = 0.2065965798260176e+0 + v = 0.3959065270221274e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5429785044928199e+0 + b = 0.2489436378852235e+0 + v = 0.4015286975463570e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5835939850491711e+0 + b = 0.2904811368946891e+0 + v = 0.4050866785614717e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6216870353444856e+0 + b = 0.3307941957666609e+0 + v = 0.4069320185051913e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4151104662709091e+0 + b = 0.4064829146052554e-1 + v = 0.3760120964062763e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4649804275009218e+0 + b = 0.8258424547294755e-1 + v = 0.3870969564418064e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5124695757009662e+0 + b = 0.1251841962027289e+0 + v = 0.3955287790534055e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5574711100606224e+0 + b = 0.1679107505976331e+0 + v = 0.4015361911302668e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5998597333287227e+0 + b = 0.2102805057358715e+0 + v = 0.4053836986719548e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6395007148516600e+0 + b = 0.2518418087774107e+0 + v = 0.4073578673299117e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5188456224746252e+0 + b = 0.4194321676077518e-1 + v = 0.3954628379231406e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5664190707942778e+0 + b = 0.8457661551921499e-1 + v = 0.4017645508847530e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6110464353283153e+0 + b = 0.1273652932519396e+0 + v = 0.4059030348651293e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6526430302051563e+0 + b = 0.1698173239076354e+0 + v = 0.4080565809484880e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6167551880377548e+0 + b = 0.4266398851548864e-1 + v = 0.4063018753664651e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6607195418355383e+0 + b = 0.8551925814238349e-1 + v = 0.4087191292799671e-3 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_3074(): + grids = [] + a = 0 + b = 0 + v = 0.2599095953754734e-4 + grids.append(SphGenOh(0, a, b, v)) + v = 0.3603134089687541e-3 + grids.append(SphGenOh(1, a, b, v)) + v = 0.3586067974412447e-3 + grids.append(SphGenOh(2, a, b, v)) + a = 0.1886108518723392e-1 + v = 0.9831528474385880e-4 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4800217244625303e-1 + v = 0.1605023107954450e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.8244922058397242e-1 + v = 0.2072200131464099e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1200408362484023e+0 + v = 0.2431297618814187e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1595773530809965e+0 + v = 0.2711819064496707e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2002635973434064e+0 + v = 0.2932762038321116e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2415127590139982e+0 + v = 0.3107032514197368e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2828584158458477e+0 + v = 0.3243808058921213e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3239091015338138e+0 + v = 0.3349899091374030e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3643225097962194e+0 + v = 0.3430580688505218e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4037897083691802e+0 + v = 0.3490124109290343e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4420247515194127e+0 + v = 0.3532148948561955e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4787572538464938e+0 + v = 0.3559862669062833e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5137265251275234e+0 + v = 0.3576224317551411e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5466764056654611e+0 + v = 0.3584050533086076e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6054859420813535e+0 + v = 0.3584903581373224e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6308106701764562e+0 + v = 0.3582991879040586e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6530369230179584e+0 + v = 0.3582371187963125e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6718609524611158e+0 + v = 0.3584353631122350e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6869676499894013e+0 + v = 0.3589120166517785e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6980467077240748e+0 + v = 0.3595445704531601e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7048241721250522e+0 + v = 0.3600943557111074e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5591105222058232e-1 + v = 0.1456447096742039e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1407384078513916e+0 + v = 0.2252370188283782e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2364035438976309e+0 + v = 0.2766135443474897e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3360602737818170e+0 + v = 0.3110729491500851e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4356292630054665e+0 + v = 0.3342506712303391e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5321569415256174e+0 + v = 0.3491981834026860e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6232956305040554e+0 + v = 0.3576003604348932e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.9469870086838469e-1 + b = 0.2778748387309470e-1 + v = 0.1921921305788564e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1353170300568141e+0 + b = 0.6076569878628364e-1 + v = 0.2301458216495632e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1771679481726077e+0 + b = 0.9703072762711040e-1 + v = 0.2604248549522893e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2197066664231751e+0 + b = 0.1354112458524762e+0 + v = 0.2845275425870697e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2624783557374927e+0 + b = 0.1750996479744100e+0 + v = 0.3036870897974840e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3050969521214442e+0 + b = 0.2154896907449802e+0 + v = 0.3188414832298066e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3472252637196021e+0 + b = 0.2560954625740152e+0 + v = 0.3307046414722089e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3885610219026360e+0 + b = 0.2965070050624096e+0 + v = 0.3398330969031360e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4288273776062765e+0 + b = 0.3363641488734497e+0 + v = 0.3466757899705373e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4677662471302948e+0 + b = 0.3753400029836788e+0 + v = 0.3516095923230054e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5051333589553359e+0 + b = 0.4131297522144286e+0 + v = 0.3549645184048486e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5406942145810492e+0 + b = 0.4494423776081795e+0 + v = 0.3570415969441392e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5742204122576457e+0 + b = 0.4839938958841502e+0 + v = 0.3581251798496118e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1865407027225188e+0 + b = 0.3259144851070796e-1 + v = 0.2543491329913348e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2321186453689432e+0 + b = 0.6835679505297343e-1 + v = 0.2786711051330776e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2773159142523882e+0 + b = 0.1062284864451989e+0 + v = 0.2985552361083679e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3219200192237254e+0 + b = 0.1454404409323047e+0 + v = 0.3145867929154039e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3657032593944029e+0 + b = 0.1854018282582510e+0 + v = 0.3273290662067609e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4084376778363622e+0 + b = 0.2256297412014750e+0 + v = 0.3372705511943501e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4499004945751427e+0 + b = 0.2657104425000896e+0 + v = 0.3448274437851510e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4898758141326335e+0 + b = 0.3052755487631557e+0 + v = 0.3503592783048583e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5281547442266309e+0 + b = 0.3439863920645423e+0 + v = 0.3541854792663162e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5645346989813992e+0 + b = 0.3815229456121914e+0 + v = 0.3565995517909428e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5988181252159848e+0 + b = 0.4175752420966734e+0 + v = 0.3578802078302898e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2850425424471603e+0 + b = 0.3562149509862536e-1 + v = 0.2958644592860982e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3324619433027876e+0 + b = 0.7330318886871096e-1 + v = 0.3119548129116835e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3785848333076282e+0 + b = 0.1123226296008472e+0 + v = 0.3250745225005984e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4232891028562115e+0 + b = 0.1521084193337708e+0 + v = 0.3355153415935208e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4664287050829722e+0 + b = 0.1921844459223610e+0 + v = 0.3435847568549328e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5078458493735726e+0 + b = 0.2321360989678303e+0 + v = 0.3495786831622488e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5473779816204180e+0 + b = 0.2715886486360520e+0 + v = 0.3537767805534621e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5848617133811376e+0 + b = 0.3101924707571355e+0 + v = 0.3564459815421428e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6201348281584888e+0 + b = 0.3476121052890973e+0 + v = 0.3578464061225468e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3852191185387871e+0 + b = 0.3763224880035108e-1 + v = 0.3239748762836212e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4325025061073423e+0 + b = 0.7659581935637135e-1 + v = 0.3345491784174287e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4778486229734490e+0 + b = 0.1163381306083900e+0 + v = 0.3429126177301782e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5211663693009000e+0 + b = 0.1563890598752899e+0 + v = 0.3492420343097421e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5623469504853703e+0 + b = 0.1963320810149200e+0 + v = 0.3537399050235257e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6012718188659246e+0 + b = 0.2357847407258738e+0 + v = 0.3566209152659172e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6378179206390117e+0 + b = 0.2743846121244060e+0 + v = 0.3581084321919782e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4836936460214534e+0 + b = 0.3895902610739024e-1 + v = 0.3426522117591512e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5293792562683797e+0 + b = 0.7871246819312640e-1 + v = 0.3491848770121379e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5726281253100033e+0 + b = 0.1187963808202981e+0 + v = 0.3539318235231476e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6133658776169068e+0 + b = 0.1587914708061787e+0 + v = 0.3570231438458694e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6515085491865307e+0 + b = 0.1983058575227646e+0 + v = 0.3586207335051714e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5778692716064976e+0 + b = 0.3977209689791542e-1 + v = 0.3541196205164025e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6207904288086192e+0 + b = 0.7990157592981152e-1 + v = 0.3574296911573953e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6608688171046802e+0 + b = 0.1199671308754309e+0 + v = 0.3591993279818963e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6656263089489130e+0 + b = 0.4015955957805969e-1 + v = 0.3595855034661997e-3 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_3470(): + grids = [] + a = 0 + b = 0 + v = 0.2040382730826330e-4 + grids.append(SphGenOh(0, a, b, v)) + v = 0.3178149703889544e-3 + grids.append(SphGenOh(2, a, b, v)) + a = 0.1721420832906233e-1 + v = 0.8288115128076110e-4 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4408875374981770e-1 + v = 0.1360883192522954e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7594680813878681e-1 + v = 0.1766854454542662e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1108335359204799e+0 + v = 0.2083153161230153e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1476517054388567e+0 + v = 0.2333279544657158e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1856731870860615e+0 + v = 0.2532809539930247e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2243634099428821e+0 + v = 0.2692472184211158e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2633006881662727e+0 + v = 0.2819949946811885e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3021340904916283e+0 + v = 0.2920953593973030e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3405594048030089e+0 + v = 0.2999889782948352e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3783044434007372e+0 + v = 0.3060292120496902e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4151194767407910e+0 + v = 0.3105109167522192e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4507705766443257e+0 + v = 0.3136902387550312e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4850346056573187e+0 + v = 0.3157984652454632e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5176950817792470e+0 + v = 0.3170516518425422e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5485384240820989e+0 + v = 0.3176568425633755e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6039117238943308e+0 + v = 0.3177198411207062e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6279956655573113e+0 + v = 0.3175519492394733e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6493636169568952e+0 + v = 0.3174654952634756e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6677644117704504e+0 + v = 0.3175676415467654e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6829368572115624e+0 + v = 0.3178923417835410e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6946195818184121e+0 + v = 0.3183788287531909e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7025711542057026e+0 + v = 0.3188755151918807e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7066004767140119e+0 + v = 0.3191916889313849e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5132537689946062e-1 + v = 0.1231779611744508e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1297994661331225e+0 + v = 0.1924661373839880e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2188852049401307e+0 + v = 0.2380881867403424e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3123174824903457e+0 + v = 0.2693100663037885e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4064037620738195e+0 + v = 0.2908673382834366e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4984958396944782e+0 + v = 0.3053914619381535e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5864975046021365e+0 + v = 0.3143916684147777e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6686711634580175e+0 + v = 0.3187042244055363e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.8715738780835950e-1 + b = 0.2557175233367578e-1 + v = 0.1635219535869790e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1248383123134007e+0 + b = 0.5604823383376681e-1 + v = 0.1968109917696070e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1638062693383378e+0 + b = 0.8968568601900765e-1 + v = 0.2236754342249974e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2035586203373176e+0 + b = 0.1254086651976279e+0 + v = 0.2453186687017181e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2436798975293774e+0 + b = 0.1624780150162012e+0 + v = 0.2627551791580541e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2838207507773806e+0 + b = 0.2003422342683208e+0 + v = 0.2767654860152220e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3236787502217692e+0 + b = 0.2385628026255263e+0 + v = 0.2879467027765895e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3629849554840691e+0 + b = 0.2767731148783578e+0 + v = 0.2967639918918702e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4014948081992087e+0 + b = 0.3146542308245309e+0 + v = 0.3035900684660351e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4389818379260225e+0 + b = 0.3519196415895088e+0 + v = 0.3087338237298308e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4752331143674377e+0 + b = 0.3883050984023654e+0 + v = 0.3124608838860167e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5100457318374018e+0 + b = 0.4235613423908649e+0 + v = 0.3150084294226743e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5432238388954868e+0 + b = 0.4574484717196220e+0 + v = 0.3165958398598402e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5745758685072442e+0 + b = 0.4897311639255524e+0 + v = 0.3174320440957372e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1723981437592809e+0 + b = 0.3010630597881105e-1 + v = 0.2182188909812599e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2149553257844597e+0 + b = 0.6326031554204694e-1 + v = 0.2399727933921445e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2573256081247422e+0 + b = 0.9848566980258631e-1 + v = 0.2579796133514652e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2993163751238106e+0 + b = 0.1350835952384266e+0 + v = 0.2727114052623535e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3407238005148000e+0 + b = 0.1725184055442181e+0 + v = 0.2846327656281355e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3813454978483264e+0 + b = 0.2103559279730725e+0 + v = 0.2941491102051334e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4209848104423343e+0 + b = 0.2482278774554860e+0 + v = 0.3016049492136107e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4594519699996300e+0 + b = 0.2858099509982883e+0 + v = 0.3072949726175648e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4965640166185930e+0 + b = 0.3228075659915428e+0 + v = 0.3114768142886460e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5321441655571562e+0 + b = 0.3589459907204151e+0 + v = 0.3143823673666223e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5660208438582166e+0 + b = 0.3939630088864310e+0 + v = 0.3162269764661535e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5980264315964364e+0 + b = 0.4276029922949089e+0 + v = 0.3172164663759821e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2644215852350733e+0 + b = 0.3300939429072552e-1 + v = 0.2554575398967435e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3090113743443063e+0 + b = 0.6803887650078501e-1 + v = 0.2701704069135677e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3525871079197808e+0 + b = 0.1044326136206709e+0 + v = 0.2823693413468940e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3950418005354029e+0 + b = 0.1416751597517679e+0 + v = 0.2922898463214289e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4362475663430163e+0 + b = 0.1793408610504821e+0 + v = 0.3001829062162428e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4760661812145854e+0 + b = 0.2170630750175722e+0 + v = 0.3062890864542953e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5143551042512103e+0 + b = 0.2545145157815807e+0 + v = 0.3108328279264746e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5509709026935597e+0 + b = 0.2913940101706601e+0 + v = 0.3140243146201245e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5857711030329428e+0 + b = 0.3274169910910705e+0 + v = 0.3160638030977130e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6186149917404392e+0 + b = 0.3623081329317265e+0 + v = 0.3171462882206275e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3586894569557064e+0 + b = 0.3497354386450040e-1 + v = 0.2812388416031796e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4035266610019441e+0 + b = 0.7129736739757095e-1 + v = 0.2912137500288045e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4467775312332510e+0 + b = 0.1084758620193165e+0 + v = 0.2993241256502206e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4883638346608543e+0 + b = 0.1460915689241772e+0 + v = 0.3057101738983822e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5281908348434601e+0 + b = 0.1837790832369980e+0 + v = 0.3105319326251432e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5661542687149311e+0 + b = 0.2212075390874021e+0 + v = 0.3139565514428167e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6021450102031452e+0 + b = 0.2580682841160985e+0 + v = 0.3161543006806366e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6360520783610050e+0 + b = 0.2940656362094121e+0 + v = 0.3172985960613294e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4521611065087196e+0 + b = 0.3631055365867002e-1 + v = 0.2989400336901431e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4959365651560963e+0 + b = 0.7348318468484350e-1 + v = 0.3054555883947677e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5376815804038283e+0 + b = 0.1111087643812648e+0 + v = 0.3104764960807702e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5773314480243768e+0 + b = 0.1488226085145408e+0 + v = 0.3141015825977616e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6148113245575056e+0 + b = 0.1862892274135151e+0 + v = 0.3164520621159896e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6500407462842380e+0 + b = 0.2231909701714456e+0 + v = 0.3176652305912204e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5425151448707213e+0 + b = 0.3718201306118944e-1 + v = 0.3105097161023939e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5841860556907931e+0 + b = 0.7483616335067346e-1 + v = 0.3143014117890550e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6234632186851500e+0 + b = 0.1125990834266120e+0 + v = 0.3168172866287200e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6602934551848843e+0 + b = 0.1501303813157619e+0 + v = 0.3181401865570968e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6278573968375105e+0 + b = 0.3767559930245720e-1 + v = 0.3170663659156037e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6665611711264577e+0 + b = 0.7548443301360158e-1 + v = 0.3185447944625510e-3 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_3890(): + grids = [] + a = 0 + b = 0 + v = 0.1807395252196920e-4 + grids.append(SphGenOh(0, a, b, v)) + v = 0.2848008782238827e-3 + grids.append(SphGenOh(1, a, b, v)) + v = 0.2836065837530581e-3 + grids.append(SphGenOh(2, a, b, v)) + a = 0.1587876419858352e-1 + v = 0.7013149266673816e-4 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4069193593751206e-1 + v = 0.1162798021956766e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7025888115257997e-1 + v = 0.1518728583972105e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1027495450028704e+0 + v = 0.1798796108216934e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1371457730893426e+0 + v = 0.2022593385972785e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1727758532671953e+0 + v = 0.2203093105575464e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2091492038929037e+0 + v = 0.2349294234299855e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2458813281751915e+0 + v = 0.2467682058747003e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2826545859450066e+0 + v = 0.2563092683572224e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3191957291799622e+0 + v = 0.2639253896763318e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3552621469299578e+0 + v = 0.2699137479265108e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3906329503406230e+0 + v = 0.2745196420166739e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4251028614093031e+0 + v = 0.2779529197397593e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4584777520111870e+0 + v = 0.2803996086684265e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4905711358710193e+0 + v = 0.2820302356715842e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5212011669847385e+0 + v = 0.2830056747491068e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5501878488737995e+0 + v = 0.2834808950776839e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6025037877479342e+0 + v = 0.2835282339078929e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6254572689549016e+0 + v = 0.2833819267065800e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6460107179528248e+0 + v = 0.2832858336906784e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6639541138154251e+0 + v = 0.2833268235451244e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6790688515667495e+0 + v = 0.2835432677029253e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6911338580371512e+0 + v = 0.2839091722743049e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6999385956126490e+0 + v = 0.2843308178875841e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7053037748656896e+0 + v = 0.2846703550533846e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4732224387180115e-1 + v = 0.1051193406971900e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1202100529326803e+0 + v = 0.1657871838796974e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2034304820664855e+0 + v = 0.2064648113714232e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2912285643573002e+0 + v = 0.2347942745819741e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3802361792726768e+0 + v = 0.2547775326597726e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4680598511056146e+0 + v = 0.2686876684847025e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5528151052155599e+0 + v = 0.2778665755515867e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6329386307803041e+0 + v = 0.2830996616782929e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.8056516651369069e-1 + b = 0.2363454684003124e-1 + v = 0.1403063340168372e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1156476077139389e+0 + b = 0.5191291632545936e-1 + v = 0.1696504125939477e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1520473382760421e+0 + b = 0.8322715736994519e-1 + v = 0.1935787242745390e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1892986699745931e+0 + b = 0.1165855667993712e+0 + v = 0.2130614510521968e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2270194446777792e+0 + b = 0.1513077167409504e+0 + v = 0.2289381265931048e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2648908185093273e+0 + b = 0.1868882025807859e+0 + v = 0.2418630292816186e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3026389259574136e+0 + b = 0.2229277629776224e+0 + v = 0.2523400495631193e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3400220296151384e+0 + b = 0.2590951840746235e+0 + v = 0.2607623973449605e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3768217953335510e+0 + b = 0.2951047291750847e+0 + v = 0.2674441032689209e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4128372900921884e+0 + b = 0.3307019714169930e+0 + v = 0.2726432360343356e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4478807131815630e+0 + b = 0.3656544101087634e+0 + v = 0.2765787685924545e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4817742034089257e+0 + b = 0.3997448951939695e+0 + v = 0.2794428690642224e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5143472814653344e+0 + b = 0.4327667110812024e+0 + v = 0.2814099002062895e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5454346213905650e+0 + b = 0.4645196123532293e+0 + v = 0.2826429531578994e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5748739313170252e+0 + b = 0.4948063555703345e+0 + v = 0.2832983542550884e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1599598738286342e+0 + b = 0.2792357590048985e-1 + v = 0.1886695565284976e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1998097412500951e+0 + b = 0.5877141038139065e-1 + v = 0.2081867882748234e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2396228952566202e+0 + b = 0.9164573914691377e-1 + v = 0.2245148680600796e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2792228341097746e+0 + b = 0.1259049641962687e+0 + v = 0.2380370491511872e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3184251107546741e+0 + b = 0.1610594823400863e+0 + v = 0.2491398041852455e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3570481164426244e+0 + b = 0.1967151653460898e+0 + v = 0.2581632405881230e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3949164710492144e+0 + b = 0.2325404606175168e+0 + v = 0.2653965506227417e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4318617293970503e+0 + b = 0.2682461141151439e+0 + v = 0.2710857216747087e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4677221009931678e+0 + b = 0.3035720116011973e+0 + v = 0.2754434093903659e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5023417939270955e+0 + b = 0.3382781859197439e+0 + v = 0.2786579932519380e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5355701836636128e+0 + b = 0.3721383065625942e+0 + v = 0.2809011080679474e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5672608451328771e+0 + b = 0.4049346360466055e+0 + v = 0.2823336184560987e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5972704202540162e+0 + b = 0.4364538098633802e+0 + v = 0.2831101175806309e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2461687022333596e+0 + b = 0.3070423166833368e-1 + v = 0.2221679970354546e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2881774566286831e+0 + b = 0.6338034669281885e-1 + v = 0.2356185734270703e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3293963604116978e+0 + b = 0.9742862487067941e-1 + v = 0.2469228344805590e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3697303822241377e+0 + b = 0.1323799532282290e+0 + v = 0.2562726348642046e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4090663023135127e+0 + b = 0.1678497018129336e+0 + v = 0.2638756726753028e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4472819355411712e+0 + b = 0.2035095105326114e+0 + v = 0.2699311157390862e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4842513377231437e+0 + b = 0.2390692566672091e+0 + v = 0.2746233268403837e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5198477629962928e+0 + b = 0.2742649818076149e+0 + v = 0.2781225674454771e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5539453011883145e+0 + b = 0.3088503806580094e+0 + v = 0.2805881254045684e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5864196762401251e+0 + b = 0.3425904245906614e+0 + v = 0.2821719877004913e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6171484466668390e+0 + b = 0.3752562294789468e+0 + v = 0.2830222502333124e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3350337830565727e+0 + b = 0.3261589934634747e-1 + v = 0.2457995956744870e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3775773224758284e+0 + b = 0.6658438928081572e-1 + v = 0.2551474407503706e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4188155229848973e+0 + b = 0.1014565797157954e+0 + v = 0.2629065335195311e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4586805892009344e+0 + b = 0.1368573320843822e+0 + v = 0.2691900449925075e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4970895714224235e+0 + b = 0.1724614851951608e+0 + v = 0.2741275485754276e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5339505133960747e+0 + b = 0.2079779381416412e+0 + v = 0.2778530970122595e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5691665792531440e+0 + b = 0.2431385788322288e+0 + v = 0.2805010567646741e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6026387682680377e+0 + b = 0.2776901883049853e+0 + v = 0.2822055834031040e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6342676150163307e+0 + b = 0.3113881356386632e+0 + v = 0.2831016901243473e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4237951119537067e+0 + b = 0.3394877848664351e-1 + v = 0.2624474901131803e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4656918683234929e+0 + b = 0.6880219556291447e-1 + v = 0.2688034163039377e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5058857069185980e+0 + b = 0.1041946859721635e+0 + v = 0.2738932751287636e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5443204666713996e+0 + b = 0.1398039738736393e+0 + v = 0.2777944791242523e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5809298813759742e+0 + b = 0.1753373381196155e+0 + v = 0.2806011661660987e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6156416039447128e+0 + b = 0.2105215793514010e+0 + v = 0.2824181456597460e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6483801351066604e+0 + b = 0.2450953312157051e+0 + v = 0.2833585216577828e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5103616577251688e+0 + b = 0.3485560643800719e-1 + v = 0.2738165236962878e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5506738792580681e+0 + b = 0.7026308631512033e-1 + v = 0.2778365208203180e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5889573040995292e+0 + b = 0.1059035061296403e+0 + v = 0.2807852940418966e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6251641589516930e+0 + b = 0.1414823925236026e+0 + v = 0.2827245949674705e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6592414921570178e+0 + b = 0.1767207908214530e+0 + v = 0.2837342344829828e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5930314017533384e+0 + b = 0.3542189339561672e-1 + v = 0.2809233907610981e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6309812253390175e+0 + b = 0.7109574040369549e-1 + v = 0.2829930809742694e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6666296011353230e+0 + b = 0.1067259792282730e+0 + v = 0.2841097874111479e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6703715271049922e+0 + b = 0.3569455268820809e-1 + v = 0.2843455206008783e-3 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_4334(): + grids = [] + a = 0 + b = 0 + v = 0.1449063022537883e-4 + grids.append(SphGenOh(0, a, b, v)) + v = 0.2546377329828424e-3 + grids.append(SphGenOh(2, a, b, v)) + a = 0.1462896151831013e-1 + v = 0.6018432961087496e-4 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3769840812493139e-1 + v = 0.1002286583263673e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6524701904096891e-1 + v = 0.1315222931028093e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.9560543416134648e-1 + v = 0.1564213746876724e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1278335898929198e+0 + v = 0.1765118841507736e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1613096104466031e+0 + v = 0.1928737099311080e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1955806225745371e+0 + v = 0.2062658534263270e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2302935218498028e+0 + v = 0.2172395445953787e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2651584344113027e+0 + v = 0.2262076188876047e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2999276825183209e+0 + v = 0.2334885699462397e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3343828669718798e+0 + v = 0.2393355273179203e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3683265013750518e+0 + v = 0.2439559200468863e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4015763206518108e+0 + v = 0.2475251866060002e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4339612026399770e+0 + v = 0.2501965558158773e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4653180651114582e+0 + v = 0.2521081407925925e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4954893331080803e+0 + v = 0.2533881002388081e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5243207068924930e+0 + v = 0.2541582900848261e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5516590479041704e+0 + v = 0.2545365737525860e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6012371927804176e+0 + v = 0.2545726993066799e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6231574466449819e+0 + v = 0.2544456197465555e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6429416514181271e+0 + v = 0.2543481596881064e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6604124272943595e+0 + v = 0.2543506451429194e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6753851470408250e+0 + v = 0.2544905675493763e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6876717970626160e+0 + v = 0.2547611407344429e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6970895061319234e+0 + v = 0.2551060375448869e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7034746912553310e+0 + v = 0.2554291933816039e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7067017217542295e+0 + v = 0.2556255710686343e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4382223501131123e-1 + v = 0.9041339695118195e-4 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1117474077400006e+0 + v = 0.1438426330079022e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1897153252911440e+0 + v = 0.1802523089820518e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2724023009910331e+0 + v = 0.2060052290565496e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3567163308709902e+0 + v = 0.2245002248967466e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4404784483028087e+0 + v = 0.2377059847731150e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5219833154161411e+0 + v = 0.2468118955882525e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5998179868977553e+0 + v = 0.2525410872966528e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6727803154548222e+0 + v = 0.2553101409933397e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.7476563943166086e-1 + b = 0.2193168509461185e-1 + v = 0.1212879733668632e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1075341482001416e+0 + b = 0.4826419281533887e-1 + v = 0.1472872881270931e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1416344885203259e+0 + b = 0.7751191883575742e-1 + v = 0.1686846601010828e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1766325315388586e+0 + b = 0.1087558139247680e+0 + v = 0.1862698414660208e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2121744174481514e+0 + b = 0.1413661374253096e+0 + v = 0.2007430956991861e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2479669443408145e+0 + b = 0.1748768214258880e+0 + v = 0.2126568125394796e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2837600452294113e+0 + b = 0.2089216406612073e+0 + v = 0.2224394603372113e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3193344933193984e+0 + b = 0.2431987685545972e+0 + v = 0.2304264522673135e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3544935442438745e+0 + b = 0.2774497054377770e+0 + v = 0.2368854288424087e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3890571932288154e+0 + b = 0.3114460356156915e+0 + v = 0.2420352089461772e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4228581214259090e+0 + b = 0.3449806851913012e+0 + v = 0.2460597113081295e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4557387211304052e+0 + b = 0.3778618641248256e+0 + v = 0.2491181912257687e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4875487950541643e+0 + b = 0.4099086391698978e+0 + v = 0.2513528194205857e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5181436529962997e+0 + b = 0.4409474925853973e+0 + v = 0.2528943096693220e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5473824095600661e+0 + b = 0.4708094517711291e+0 + v = 0.2538660368488136e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5751263398976174e+0 + b = 0.4993275140354637e+0 + v = 0.2543868648299022e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1489515746840028e+0 + b = 0.2599381993267017e-1 + v = 0.1642595537825183e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1863656444351767e+0 + b = 0.5479286532462190e-1 + v = 0.1818246659849308e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2238602880356348e+0 + b = 0.8556763251425254e-1 + v = 0.1966565649492420e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2612723375728160e+0 + b = 0.1177257802267011e+0 + v = 0.2090677905657991e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2984332990206190e+0 + b = 0.1508168456192700e+0 + v = 0.2193820409510504e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3351786584663333e+0 + b = 0.1844801892177727e+0 + v = 0.2278870827661928e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3713505522209120e+0 + b = 0.2184145236087598e+0 + v = 0.2348283192282090e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4067981098954663e+0 + b = 0.2523590641486229e+0 + v = 0.2404139755581477e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4413769993687534e+0 + b = 0.2860812976901373e+0 + v = 0.2448227407760734e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4749487182516394e+0 + b = 0.3193686757808996e+0 + v = 0.2482110455592573e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5073798105075426e+0 + b = 0.3520226949547602e+0 + v = 0.2507192397774103e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5385410448878654e+0 + b = 0.3838544395667890e+0 + v = 0.2524765968534880e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5683065353670530e+0 + b = 0.4146810037640963e+0 + v = 0.2536052388539425e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5965527620663510e+0 + b = 0.4443224094681121e+0 + v = 0.2542230588033068e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2299227700856157e+0 + b = 0.2865757664057584e-1 + v = 0.1944817013047896e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2695752998553267e+0 + b = 0.5923421684485993e-1 + v = 0.2067862362746635e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3086178716611389e+0 + b = 0.9117817776057715e-1 + v = 0.2172440734649114e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3469649871659077e+0 + b = 0.1240593814082605e+0 + v = 0.2260125991723423e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3845153566319655e+0 + b = 0.1575272058259175e+0 + v = 0.2332655008689523e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4211600033403215e+0 + b = 0.1912845163525413e+0 + v = 0.2391699681532458e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4567867834329882e+0 + b = 0.2250710177858171e+0 + v = 0.2438801528273928e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4912829319232061e+0 + b = 0.2586521303440910e+0 + v = 0.2475370504260665e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5245364793303812e+0 + b = 0.2918112242865407e+0 + v = 0.2502707235640574e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5564369788915756e+0 + b = 0.3243439239067890e+0 + v = 0.2522031701054241e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5868757697775287e+0 + b = 0.3560536787835351e+0 + v = 0.2534511269978784e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6157458853519617e+0 + b = 0.3867480821242581e+0 + v = 0.2541284914955151e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3138461110672113e+0 + b = 0.3051374637507278e-1 + v = 0.2161509250688394e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3542495872050569e+0 + b = 0.6237111233730755e-1 + v = 0.2248778513437852e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3935751553120181e+0 + b = 0.9516223952401907e-1 + v = 0.2322388803404617e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4317634668111147e+0 + b = 0.1285467341508517e+0 + v = 0.2383265471001355e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4687413842250821e+0 + b = 0.1622318931656033e+0 + v = 0.2432476675019525e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5044274237060283e+0 + b = 0.1959581153836453e+0 + v = 0.2471122223750674e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5387354077925727e+0 + b = 0.2294888081183837e+0 + v = 0.2500291752486870e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5715768898356105e+0 + b = 0.2626031152713945e+0 + v = 0.2521055942764682e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6028627200136111e+0 + b = 0.2950904075286713e+0 + v = 0.2534472785575503e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6325039812653463e+0 + b = 0.3267458451113286e+0 + v = 0.2541599713080121e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3981986708423407e+0 + b = 0.3183291458749821e-1 + v = 0.2317380975862936e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4382791182133300e+0 + b = 0.6459548193880908e-1 + v = 0.2378550733719775e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4769233057218166e+0 + b = 0.9795757037087952e-1 + v = 0.2428884456739118e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5140823911194238e+0 + b = 0.1316307235126655e+0 + v = 0.2469002655757292e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5496977833862983e+0 + b = 0.1653556486358704e+0 + v = 0.2499657574265851e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5837047306512727e+0 + b = 0.1988931724126510e+0 + v = 0.2521676168486082e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6160349566926879e+0 + b = 0.2320174581438950e+0 + v = 0.2535935662645334e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6466185353209440e+0 + b = 0.2645106562168662e+0 + v = 0.2543356743363214e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4810835158795404e+0 + b = 0.3275917807743992e-1 + v = 0.2427353285201535e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5199925041324341e+0 + b = 0.6612546183967181e-1 + v = 0.2468258039744386e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5571717692207494e+0 + b = 0.9981498331474143e-1 + v = 0.2500060956440310e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5925789250836378e+0 + b = 0.1335687001410374e+0 + v = 0.2523238365420979e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6261658523859670e+0 + b = 0.1671444402896463e+0 + v = 0.2538399260252846e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6578811126669331e+0 + b = 0.2003106382156076e+0 + v = 0.2546255927268069e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5609624612998100e+0 + b = 0.3337500940231335e-1 + v = 0.2500583360048449e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5979959659984670e+0 + b = 0.6708750335901803e-1 + v = 0.2524777638260203e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6330523711054002e+0 + b = 0.1008792126424850e+0 + v = 0.2540951193860656e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6660960998103972e+0 + b = 0.1345050343171794e+0 + v = 0.2549524085027472e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6365384364585819e+0 + b = 0.3372799460737052e-1 + v = 0.2542569507009158e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6710994302899275e+0 + b = 0.6755249309678028e-1 + v = 0.2552114127580376e-3 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_4802(): + grids = [] + a = 0 + b = 0 + v = 0.9687521879420705e-4 + grids.append(SphGenOh(0, a, b, v)) + v = 0.2307897895367918e-3 + grids.append(SphGenOh(1, a, b, v)) + v = 0.2297310852498558e-3 + grids.append(SphGenOh(2, a, b, v)) + a = 0.2335728608887064e-1 + v = 0.7386265944001919e-4 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4352987836550653e-1 + v = 0.8257977698542210e-4 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6439200521088801e-1 + v = 0.9706044762057630e-4 + grids.append(SphGenOh(3, a, b, v)) + a = 0.9003943631993181e-1 + v = 0.1302393847117003e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1196706615548473e+0 + v = 0.1541957004600968e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1511715412838134e+0 + v = 0.1704459770092199e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1835982828503801e+0 + v = 0.1827374890942906e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2165081259155405e+0 + v = 0.1926360817436107e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2496208720417563e+0 + v = 0.2008010239494833e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2827200673567900e+0 + v = 0.2075635983209175e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3156190823994346e+0 + v = 0.2131306638690909e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3481476793749115e+0 + v = 0.2176562329937335e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3801466086947226e+0 + v = 0.2212682262991018e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4114652119634011e+0 + v = 0.2240799515668565e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4419598786519751e+0 + v = 0.2261959816187525e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4714925949329543e+0 + v = 0.2277156368808855e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4999293972879466e+0 + v = 0.2287351772128336e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5271387221431248e+0 + v = 0.2293490814084085e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5529896780837761e+0 + v = 0.2296505312376273e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6000856099481712e+0 + v = 0.2296793832318756e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6210562192785175e+0 + v = 0.2295785443842974e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6401165879934240e+0 + v = 0.2295017931529102e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6571144029244334e+0 + v = 0.2295059638184868e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6718910821718863e+0 + v = 0.2296232343237362e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6842845591099010e+0 + v = 0.2298530178740771e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6941353476269816e+0 + v = 0.2301579790280501e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7012965242212991e+0 + v = 0.2304690404996513e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7056471428242644e+0 + v = 0.2307027995907102e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4595557643585895e-1 + v = 0.9312274696671092e-4 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1049316742435023e+0 + v = 0.1199919385876926e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1773548879549274e+0 + v = 0.1598039138877690e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2559071411236127e+0 + v = 0.1822253763574900e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3358156837985898e+0 + v = 0.1988579593655040e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4155835743763893e+0 + v = 0.2112620102533307e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4937894296167472e+0 + v = 0.2201594887699007e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5691569694793316e+0 + v = 0.2261622590895036e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6405840854894251e+0 + v = 0.2296458453435705e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.7345133894143348e-1 + b = 0.2177844081486067e-1 + v = 0.1006006990267000e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1009859834044931e+0 + b = 0.4590362185775188e-1 + v = 0.1227676689635876e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1324289619748758e+0 + b = 0.7255063095690877e-1 + v = 0.1467864280270117e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1654272109607127e+0 + b = 0.1017825451960684e+0 + v = 0.1644178912101232e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1990767186776461e+0 + b = 0.1325652320980364e+0 + v = 0.1777664890718961e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2330125945523278e+0 + b = 0.1642765374496765e+0 + v = 0.1884825664516690e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2670080611108287e+0 + b = 0.1965360374337889e+0 + v = 0.1973269246453848e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3008753376294316e+0 + b = 0.2290726770542238e+0 + v = 0.2046767775855328e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3344475596167860e+0 + b = 0.2616645495370823e+0 + v = 0.2107600125918040e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3675709724070786e+0 + b = 0.2941150728843141e+0 + v = 0.2157416362266829e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4001000887587812e+0 + b = 0.3262440400919066e+0 + v = 0.2197557816920721e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4318956350436028e+0 + b = 0.3578835350611916e+0 + v = 0.2229192611835437e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4628239056795531e+0 + b = 0.3888751854043678e+0 + v = 0.2253385110212775e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4927563229773636e+0 + b = 0.4190678003222840e+0 + v = 0.2271137107548774e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5215687136707969e+0 + b = 0.4483151836883852e+0 + v = 0.2283414092917525e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5491402346984905e+0 + b = 0.4764740676087880e+0 + v = 0.2291161673130077e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5753520160126075e+0 + b = 0.5034021310998277e+0 + v = 0.2295313908576598e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1388326356417754e+0 + b = 0.2435436510372806e-1 + v = 0.1438204721359031e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1743686900537244e+0 + b = 0.5118897057342652e-1 + v = 0.1607738025495257e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2099737037950268e+0 + b = 0.8014695048539634e-1 + v = 0.1741483853528379e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2454492590908548e+0 + b = 0.1105117874155699e+0 + v = 0.1851918467519151e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2807219257864278e+0 + b = 0.1417950531570966e+0 + v = 0.1944628638070613e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3156842271975842e+0 + b = 0.1736604945719597e+0 + v = 0.2022495446275152e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3502090945177752e+0 + b = 0.2058466324693981e+0 + v = 0.2087462382438514e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3841684849519686e+0 + b = 0.2381284261195919e+0 + v = 0.2141074754818308e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4174372367906016e+0 + b = 0.2703031270422569e+0 + v = 0.2184640913748162e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4498926465011892e+0 + b = 0.3021845683091309e+0 + v = 0.2219309165220329e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4814146229807701e+0 + b = 0.3335993355165720e+0 + v = 0.2246123118340624e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5118863625734701e+0 + b = 0.3643833735518232e+0 + v = 0.2266062766915125e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5411947455119144e+0 + b = 0.3943789541958179e+0 + v = 0.2280072952230796e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5692301500357246e+0 + b = 0.4234320144403542e+0 + v = 0.2289082025202583e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5958857204139576e+0 + b = 0.4513897947419260e+0 + v = 0.2294012695120025e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2156270284785766e+0 + b = 0.2681225755444491e-1 + v = 0.1722434488736947e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2532385054909710e+0 + b = 0.5557495747805614e-1 + v = 0.1830237421455091e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2902564617771537e+0 + b = 0.8569368062950249e-1 + v = 0.1923855349997633e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3266979823143256e+0 + b = 0.1167367450324135e+0 + v = 0.2004067861936271e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3625039627493614e+0 + b = 0.1483861994003304e+0 + v = 0.2071817297354263e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3975838937548699e+0 + b = 0.1803821503011405e+0 + v = 0.2128250834102103e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4318396099009774e+0 + b = 0.2124962965666424e+0 + v = 0.2174513719440102e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4651706555732742e+0 + b = 0.2445221837805913e+0 + v = 0.2211661839150214e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4974752649620969e+0 + b = 0.2762701224322987e+0 + v = 0.2240665257813102e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5286517579627517e+0 + b = 0.3075627775211328e+0 + v = 0.2262439516632620e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5586001195731895e+0 + b = 0.3382311089826877e+0 + v = 0.2277874557231869e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5872229902021319e+0 + b = 0.3681108834741399e+0 + v = 0.2287854314454994e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6144258616235123e+0 + b = 0.3970397446872839e+0 + v = 0.2293268499615575e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2951676508064861e+0 + b = 0.2867499538750441e-1 + v = 0.1912628201529828e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3335085485472725e+0 + b = 0.5867879341903510e-1 + v = 0.1992499672238701e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3709561760636381e+0 + b = 0.8961099205022284e-1 + v = 0.2061275533454027e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4074722861667498e+0 + b = 0.1211627927626297e+0 + v = 0.2119318215968572e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4429923648839117e+0 + b = 0.1530748903554898e+0 + v = 0.2167416581882652e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4774428052721736e+0 + b = 0.1851176436721877e+0 + v = 0.2206430730516600e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5107446539535904e+0 + b = 0.2170829107658179e+0 + v = 0.2237186938699523e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5428151370542935e+0 + b = 0.2487786689026271e+0 + v = 0.2260480075032884e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5735699292556964e+0 + b = 0.2800239952795016e+0 + v = 0.2277098884558542e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6029253794562866e+0 + b = 0.3106445702878119e+0 + v = 0.2287845715109671e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6307998987073145e+0 + b = 0.3404689500841194e+0 + v = 0.2293547268236294e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3752652273692719e+0 + b = 0.2997145098184479e-1 + v = 0.2056073839852528e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4135383879344028e+0 + b = 0.6086725898678011e-1 + v = 0.2114235865831876e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4506113885153907e+0 + b = 0.9238849548435643e-1 + v = 0.2163175629770551e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4864401554606072e+0 + b = 0.1242786603851851e+0 + v = 0.2203392158111650e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5209708076611709e+0 + b = 0.1563086731483386e+0 + v = 0.2235473176847839e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5541422135830122e+0 + b = 0.1882696509388506e+0 + v = 0.2260024141501235e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5858880915113817e+0 + b = 0.2199672979126059e+0 + v = 0.2277675929329182e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6161399390603444e+0 + b = 0.2512165482924867e+0 + v = 0.2289102112284834e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6448296482255090e+0 + b = 0.2818368701871888e+0 + v = 0.2295027954625118e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4544796274917948e+0 + b = 0.3088970405060312e-1 + v = 0.2161281589879992e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4919389072146628e+0 + b = 0.6240947677636835e-1 + v = 0.2201980477395102e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5279313026985183e+0 + b = 0.9430706144280313e-1 + v = 0.2234952066593166e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5624169925571135e+0 + b = 0.1263547818770374e+0 + v = 0.2260540098520838e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5953484627093287e+0 + b = 0.1583430788822594e+0 + v = 0.2279157981899988e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6266730715339185e+0 + b = 0.1900748462555988e+0 + v = 0.2291296918565571e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6563363204278871e+0 + b = 0.2213599519592567e+0 + v = 0.2297533752536649e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5314574716585696e+0 + b = 0.3152508811515374e-1 + v = 0.2234927356465995e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5674614932298185e+0 + b = 0.6343865291465561e-1 + v = 0.2261288012985219e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6017706004970264e+0 + b = 0.9551503504223951e-1 + v = 0.2280818160923688e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6343471270264178e+0 + b = 0.1275440099801196e+0 + v = 0.2293773295180159e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6651494599127802e+0 + b = 0.1593252037671960e+0 + v = 0.2300528767338634e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6050184986005704e+0 + b = 0.3192538338496105e-1 + v = 0.2281893855065666e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6390163550880400e+0 + b = 0.6402824353962306e-1 + v = 0.2295720444840727e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6711199107088448e+0 + b = 0.9609805077002909e-1 + v = 0.2303227649026753e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6741354429572275e+0 + b = 0.3211853196273233e-1 + v = 0.2304831913227114e-3 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_5294(): + grids = [] + a = 0 + b = 0 + v = 0.9080510764308163e-4 + grids.append(SphGenOh(0, a, b, v)) + v = 0.2084824361987793e-3 + grids.append(SphGenOh(2, a, b, v)) + a = 0.2303261686261450e-1 + v = 0.5011105657239616e-4 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3757208620162394e-1 + v = 0.5942520409683854e-4 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5821912033821852e-1 + v = 0.9564394826109721e-4 + grids.append(SphGenOh(3, a, b, v)) + a = 0.8403127529194872e-1 + v = 0.1185530657126338e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1122927798060578e+0 + v = 0.1364510114230331e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1420125319192987e+0 + v = 0.1505828825605415e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1726396437341978e+0 + v = 0.1619298749867023e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2038170058115696e+0 + v = 0.1712450504267789e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2352849892876508e+0 + v = 0.1789891098164999e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2668363354312461e+0 + v = 0.1854474955629795e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2982941279900452e+0 + v = 0.1908148636673661e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3295002922087076e+0 + v = 0.1952377405281833e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3603094918363593e+0 + v = 0.1988349254282232e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3905857895173920e+0 + v = 0.2017079807160050e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4202005758160837e+0 + v = 0.2039473082709094e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4490310061597227e+0 + v = 0.2056360279288953e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4769586160311491e+0 + v = 0.2068525823066865e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5038679887049750e+0 + v = 0.2076724877534488e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5296454286519961e+0 + v = 0.2081694278237885e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5541776207164850e+0 + v = 0.2084157631219326e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5990467321921213e+0 + v = 0.2084381531128593e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6191467096294587e+0 + v = 0.2083476277129307e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6375251212901849e+0 + v = 0.2082686194459732e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6540514381131168e+0 + v = 0.2082475686112415e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6685899064391510e+0 + v = 0.2083139860289915e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6810013009681648e+0 + v = 0.2084745561831237e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6911469578730340e+0 + v = 0.2087091313375890e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6988956915141736e+0 + v = 0.2089718413297697e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7041335794868720e+0 + v = 0.2092003303479793e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7067754398018567e+0 + v = 0.2093336148263241e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3840368707853623e-1 + v = 0.7591708117365267e-4 + grids.append(SphGenOh(4, a, b, v)) + a = 0.9835485954117399e-1 + v = 0.1083383968169186e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1665774947612998e+0 + v = 0.1403019395292510e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2405702335362910e+0 + v = 0.1615970179286436e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3165270770189046e+0 + v = 0.1771144187504911e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3927386145645443e+0 + v = 0.1887760022988168e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4678825918374656e+0 + v = 0.1973474670768214e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5408022024266935e+0 + v = 0.2033787661234659e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6104967445752438e+0 + v = 0.2072343626517331e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6760910702685738e+0 + v = 0.2091177834226918e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6655644120217392e-1 + b = 0.1936508874588424e-1 + v = 0.9316684484675566e-4 + grids.append(SphGenOh(5, a, b, v)) + a = 0.9446246161270182e-1 + b = 0.4252442002115869e-1 + v = 0.1116193688682976e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1242651925452509e+0 + b = 0.6806529315354374e-1 + v = 0.1298623551559414e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1553438064846751e+0 + b = 0.9560957491205369e-1 + v = 0.1450236832456426e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1871137110542670e+0 + b = 0.1245931657452888e+0 + v = 0.1572719958149914e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2192612628836257e+0 + b = 0.1545385828778978e+0 + v = 0.1673234785867195e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2515682807206955e+0 + b = 0.1851004249723368e+0 + v = 0.1756860118725188e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2838535866287290e+0 + b = 0.2160182608272384e+0 + v = 0.1826776290439367e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3159578817528521e+0 + b = 0.2470799012277111e+0 + v = 0.1885116347992865e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3477370882791392e+0 + b = 0.2781014208986402e+0 + v = 0.1933457860170574e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3790576960890540e+0 + b = 0.3089172523515731e+0 + v = 0.1973060671902064e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4097938317810200e+0 + b = 0.3393750055472244e+0 + v = 0.2004987099616311e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4398256572859637e+0 + b = 0.3693322470987730e+0 + v = 0.2030170909281499e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4690384114718480e+0 + b = 0.3986541005609877e+0 + v = 0.2049461460119080e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4973216048301053e+0 + b = 0.4272112491408562e+0 + v = 0.2063653565200186e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5245681526132446e+0 + b = 0.4548781735309936e+0 + v = 0.2073507927381027e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5506733911803888e+0 + b = 0.4815315355023251e+0 + v = 0.2079764593256122e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5755339829522475e+0 + b = 0.5070486445801855e+0 + v = 0.2083150534968778e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1305472386056362e+0 + b = 0.2284970375722366e-1 + v = 0.1262715121590664e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1637327908216477e+0 + b = 0.4812254338288384e-1 + v = 0.1414386128545972e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1972734634149637e+0 + b = 0.7531734457511935e-1 + v = 0.1538740401313898e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2308694653110130e+0 + b = 0.1039043639882017e+0 + v = 0.1642434942331432e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2643899218338160e+0 + b = 0.1334526587117626e+0 + v = 0.1729790609237496e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2977171599622171e+0 + b = 0.1636414868936382e+0 + v = 0.1803505190260828e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3307293903032310e+0 + b = 0.1942195406166568e+0 + v = 0.1865475350079657e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3633069198219073e+0 + b = 0.2249752879943753e+0 + v = 0.1917182669679069e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3953346955922727e+0 + b = 0.2557218821820032e+0 + v = 0.1959851709034382e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4267018394184914e+0 + b = 0.2862897925213193e+0 + v = 0.1994529548117882e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4573009622571704e+0 + b = 0.3165224536636518e+0 + v = 0.2022138911146548e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4870279559856109e+0 + b = 0.3462730221636496e+0 + v = 0.2043518024208592e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5157819581450322e+0 + b = 0.3754016870282835e+0 + v = 0.2059450313018110e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5434651666465393e+0 + b = 0.4037733784993613e+0 + v = 0.2070685715318472e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5699823887764627e+0 + b = 0.4312557784139123e+0 + v = 0.2077955310694373e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5952403350947741e+0 + b = 0.4577175367122110e+0 + v = 0.2081980387824712e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2025152599210369e+0 + b = 0.2520253617719557e-1 + v = 0.1521318610377956e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2381066653274425e+0 + b = 0.5223254506119000e-1 + v = 0.1622772720185755e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2732823383651612e+0 + b = 0.8060669688588620e-1 + v = 0.1710498139420709e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3080137692611118e+0 + b = 0.1099335754081255e+0 + v = 0.1785911149448736e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3422405614587601e+0 + b = 0.1399120955959857e+0 + v = 0.1850125313687736e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3758808773890420e+0 + b = 0.1702977801651705e+0 + v = 0.1904229703933298e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4088458383438932e+0 + b = 0.2008799256601680e+0 + v = 0.1949259956121987e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4410450550841152e+0 + b = 0.2314703052180836e+0 + v = 0.1986161545363960e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4723879420561312e+0 + b = 0.2618972111375892e+0 + v = 0.2015790585641370e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5027843561874343e+0 + b = 0.2920013195600270e+0 + v = 0.2038934198707418e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5321453674452458e+0 + b = 0.3216322555190551e+0 + v = 0.2056334060538251e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5603839113834030e+0 + b = 0.3506456615934198e+0 + v = 0.2068705959462289e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5874150706875146e+0 + b = 0.3789007181306267e+0 + v = 0.2076753906106002e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6131559381660038e+0 + b = 0.4062580170572782e+0 + v = 0.2081179391734803e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2778497016394506e+0 + b = 0.2696271276876226e-1 + v = 0.1700345216228943e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3143733562261912e+0 + b = 0.5523469316960465e-1 + v = 0.1774906779990410e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3501485810261827e+0 + b = 0.8445193201626464e-1 + v = 0.1839659377002642e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3851430322303653e+0 + b = 0.1143263119336083e+0 + v = 0.1894987462975169e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4193013979470415e+0 + b = 0.1446177898344475e+0 + v = 0.1941548809452595e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4525585960458567e+0 + b = 0.1751165438438091e+0 + v = 0.1980078427252384e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4848447779622947e+0 + b = 0.2056338306745660e+0 + v = 0.2011296284744488e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5160871208276894e+0 + b = 0.2359965487229226e+0 + v = 0.2035888456966776e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5462112185696926e+0 + b = 0.2660430223139146e+0 + v = 0.2054516325352142e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5751425068101757e+0 + b = 0.2956193664498032e+0 + v = 0.2067831033092635e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6028073872853596e+0 + b = 0.3245763905312779e+0 + v = 0.2076485320284876e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6291338275278409e+0 + b = 0.3527670026206972e+0 + v = 0.2081141439525255e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3541797528439391e+0 + b = 0.2823853479435550e-1 + v = 0.1834383015469222e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3908234972074657e+0 + b = 0.5741296374713106e-1 + v = 0.1889540591777677e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4264408450107590e+0 + b = 0.8724646633650199e-1 + v = 0.1936677023597375e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4609949666553286e+0 + b = 0.1175034422915616e+0 + v = 0.1976176495066504e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4944389496536006e+0 + b = 0.1479755652628428e+0 + v = 0.2008536004560983e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5267194884346086e+0 + b = 0.1784740659484352e+0 + v = 0.2034280351712291e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5577787810220990e+0 + b = 0.2088245700431244e+0 + v = 0.2053944466027758e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5875563763536670e+0 + b = 0.2388628136570763e+0 + v = 0.2068077642882360e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6159910016391269e+0 + b = 0.2684308928769185e+0 + v = 0.2077250949661599e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6430219602956268e+0 + b = 0.2973740761960252e+0 + v = 0.2082062440705320e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4300647036213646e+0 + b = 0.2916399920493977e-1 + v = 0.1934374486546626e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4661486308935531e+0 + b = 0.5898803024755659e-1 + v = 0.1974107010484300e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5009658555287261e+0 + b = 0.8924162698525409e-1 + v = 0.2007129290388658e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5344824270447704e+0 + b = 0.1197185199637321e+0 + v = 0.2033736947471293e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5666575997416371e+0 + b = 0.1502300756161382e+0 + v = 0.2054287125902493e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5974457471404752e+0 + b = 0.1806004191913564e+0 + v = 0.2069184936818894e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6267984444116886e+0 + b = 0.2106621764786252e+0 + v = 0.2078883689808782e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6546664713575417e+0 + b = 0.2402526932671914e+0 + v = 0.2083886366116359e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5042711004437253e+0 + b = 0.2982529203607657e-1 + v = 0.2006593275470817e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5392127456774380e+0 + b = 0.6008728062339922e-1 + v = 0.2033728426135397e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5726819437668618e+0 + b = 0.9058227674571398e-1 + v = 0.2055008781377608e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6046469254207278e+0 + b = 0.1211219235803400e+0 + v = 0.2070651783518502e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6350716157434952e+0 + b = 0.1515286404791580e+0 + v = 0.2080953335094320e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6639177679185454e+0 + b = 0.1816314681255552e+0 + v = 0.2086284998988521e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5757276040972253e+0 + b = 0.3026991752575440e-1 + v = 0.2055549387644668e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6090265823139755e+0 + b = 0.6078402297870770e-1 + v = 0.2071871850267654e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6406735344387661e+0 + b = 0.9135459984176636e-1 + v = 0.2082856600431965e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6706397927793709e+0 + b = 0.1218024155966590e+0 + v = 0.2088705858819358e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6435019674426665e+0 + b = 0.3052608357660639e-1 + v = 0.2083995867536322e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6747218676375681e+0 + b = 0.6112185773983089e-1 + v = 0.2090509712889637e-3 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + + +def MakeAngularGrid_5810(): + grids = [] + a = 0 + b = 0 + v = 0.9735347946175486e-5 + grids.append(SphGenOh(0, a, b, v)) + v = 0.1907581241803167e-3 + grids.append(SphGenOh(1, a, b, v)) + v = 0.1901059546737578e-3 + grids.append(SphGenOh(2, a, b, v)) + a = 0.1182361662400277e-1 + v = 0.3926424538919212e-4 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3062145009138958e-1 + v = 0.6667905467294382e-4 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5329794036834243e-1 + v = 0.8868891315019135e-4 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7848165532862220e-1 + v = 0.1066306000958872e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1054038157636201e+0 + v = 0.1214506743336128e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1335577797766211e+0 + v = 0.1338054681640871e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1625769955502252e+0 + v = 0.1441677023628504e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.1921787193412792e+0 + v = 0.1528880200826557e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2221340534690548e+0 + v = 0.1602330623773609e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2522504912791132e+0 + v = 0.1664102653445244e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.2823610860679697e+0 + v = 0.1715845854011323e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3123173966267560e+0 + v = 0.1758901000133069e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3419847036953789e+0 + v = 0.1794382485256736e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3712386456999758e+0 + v = 0.1823238106757407e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3999627649876828e+0 + v = 0.1846293252959976e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4280466458648093e+0 + v = 0.1864284079323098e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4553844360185711e+0 + v = 0.1877882694626914e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.4818736094437834e+0 + v = 0.1887716321852025e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5074138709260629e+0 + v = 0.1894381638175673e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5319061304570707e+0 + v = 0.1898454899533629e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5552514978677286e+0 + v = 0.1900497929577815e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.5981009025246183e+0 + v = 0.1900671501924092e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6173990192228116e+0 + v = 0.1899837555533510e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6351365239411131e+0 + v = 0.1899014113156229e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6512010228227200e+0 + v = 0.1898581257705106e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6654758363948120e+0 + v = 0.1898804756095753e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6778410414853370e+0 + v = 0.1899793610426402e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6881760887484110e+0 + v = 0.1901464554844117e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.6963645267094598e+0 + v = 0.1903533246259542e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7023010617153579e+0 + v = 0.1905556158463228e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.7059004636628753e+0 + v = 0.1907037155663528e-3 + grids.append(SphGenOh(3, a, b, v)) + a = 0.3552470312472575e-1 + v = 0.5992997844249967e-4 + grids.append(SphGenOh(4, a, b, v)) + a = 0.9151176620841283e-1 + v = 0.9749059382456978e-4 + grids.append(SphGenOh(4, a, b, v)) + a = 0.1566197930068980e+0 + v = 0.1241680804599158e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2265467599271907e+0 + v = 0.1437626154299360e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.2988242318581361e+0 + v = 0.1584200054793902e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.3717482419703886e+0 + v = 0.1694436550982744e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.4440094491758889e+0 + v = 0.1776617014018108e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5145337096756642e+0 + v = 0.1836132434440077e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.5824053672860230e+0 + v = 0.1876494727075983e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6468283961043370e+0 + v = 0.1899906535336482e-3 + grids.append(SphGenOh(4, a, b, v)) + a = 0.6095964259104373e-1 + b = 0.1787828275342931e-1 + v = 0.8143252820767350e-4 + grids.append(SphGenOh(5, a, b, v)) + a = 0.8811962270959388e-1 + b = 0.3953888740792096e-1 + v = 0.9998859890887728e-4 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1165936722428831e+0 + b = 0.6378121797722990e-1 + v = 0.1156199403068359e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1460232857031785e+0 + b = 0.8985890813745037e-1 + v = 0.1287632092635513e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1761197110181755e+0 + b = 0.1172606510576162e+0 + v = 0.1398378643365139e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2066471190463718e+0 + b = 0.1456102876970995e+0 + v = 0.1491876468417391e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2374076026328152e+0 + b = 0.1746153823011775e+0 + v = 0.1570855679175456e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2682305474337051e+0 + b = 0.2040383070295584e+0 + v = 0.1637483948103775e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2989653312142369e+0 + b = 0.2336788634003698e+0 + v = 0.1693500566632843e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3294762752772209e+0 + b = 0.2633632752654219e+0 + v = 0.1740322769393633e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3596390887276086e+0 + b = 0.2929369098051601e+0 + v = 0.1779126637278296e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3893383046398812e+0 + b = 0.3222592785275512e+0 + v = 0.1810908108835412e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4184653789358347e+0 + b = 0.3512004791195743e+0 + v = 0.1836529132600190e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4469172319076166e+0 + b = 0.3796385677684537e+0 + v = 0.1856752841777379e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4745950813276976e+0 + b = 0.4074575378263879e+0 + v = 0.1872270566606832e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5014034601410262e+0 + b = 0.4345456906027828e+0 + v = 0.1883722645591307e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5272493404551239e+0 + b = 0.4607942515205134e+0 + v = 0.1891714324525297e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5520413051846366e+0 + b = 0.4860961284181720e+0 + v = 0.1896827480450146e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5756887237503077e+0 + b = 0.5103447395342790e+0 + v = 0.1899628417059528e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1225039430588352e+0 + b = 0.2136455922655793e-1 + v = 0.1123301829001669e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1539113217321372e+0 + b = 0.4520926166137188e-1 + v = 0.1253698826711277e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1856213098637712e+0 + b = 0.7086468177864818e-1 + v = 0.1366266117678531e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2174998728035131e+0 + b = 0.9785239488772918e-1 + v = 0.1462736856106918e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2494128336938330e+0 + b = 0.1258106396267210e+0 + v = 0.1545076466685412e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2812321562143480e+0 + b = 0.1544529125047001e+0 + v = 0.1615096280814007e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3128372276456111e+0 + b = 0.1835433512202753e+0 + v = 0.1674366639741759e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3441145160177973e+0 + b = 0.2128813258619585e+0 + v = 0.1724225002437900e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3749567714853510e+0 + b = 0.2422913734880829e+0 + v = 0.1765810822987288e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4052621732015610e+0 + b = 0.2716163748391453e+0 + v = 0.1800104126010751e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4349335453522385e+0 + b = 0.3007127671240280e+0 + v = 0.1827960437331284e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4638776641524965e+0 + b = 0.3294470677216479e+0 + v = 0.1850140300716308e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4920046410462687e+0 + b = 0.3576932543699155e+0 + v = 0.1867333507394938e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5192273554861704e+0 + b = 0.3853307059757764e+0 + v = 0.1880178688638289e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5454609081136522e+0 + b = 0.4122425044452694e+0 + v = 0.1889278925654758e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5706220661424140e+0 + b = 0.4383139587781027e+0 + v = 0.1895213832507346e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5946286755181518e+0 + b = 0.4634312536300553e+0 + v = 0.1898548277397420e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.1905370790924295e+0 + b = 0.2371311537781979e-1 + v = 0.1349105935937341e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2242518717748009e+0 + b = 0.4917878059254806e-1 + v = 0.1444060068369326e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2577190808025936e+0 + b = 0.7595498960495142e-1 + v = 0.1526797390930008e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2908724534927187e+0 + b = 0.1036991083191100e+0 + v = 0.1598208771406474e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3236354020056219e+0 + b = 0.1321348584450234e+0 + v = 0.1659354368615331e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3559267359304543e+0 + b = 0.1610316571314789e+0 + v = 0.1711279910946440e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3876637123676956e+0 + b = 0.1901912080395707e+0 + v = 0.1754952725601440e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4187636705218842e+0 + b = 0.2194384950137950e+0 + v = 0.1791247850802529e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4491449019883107e+0 + b = 0.2486155334763858e+0 + v = 0.1820954300877716e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4787270932425445e+0 + b = 0.2775768931812335e+0 + v = 0.1844788524548449e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5074315153055574e+0 + b = 0.3061863786591120e+0 + v = 0.1863409481706220e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5351810507738336e+0 + b = 0.3343144718152556e+0 + v = 0.1877433008795068e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5619001025975381e+0 + b = 0.3618362729028427e+0 + v = 0.1887444543705232e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5875144035268046e+0 + b = 0.3886297583620408e+0 + v = 0.1894009829375006e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6119507308734495e+0 + b = 0.4145742277792031e+0 + v = 0.1897683345035198e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2619733870119463e+0 + b = 0.2540047186389353e-1 + v = 0.1517327037467653e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.2968149743237949e+0 + b = 0.5208107018543989e-1 + v = 0.1587740557483543e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3310451504860488e+0 + b = 0.7971828470885599e-1 + v = 0.1649093382274097e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3646215567376676e+0 + b = 0.1080465999177927e+0 + v = 0.1701915216193265e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3974916785279360e+0 + b = 0.1368413849366629e+0 + v = 0.1746847753144065e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4295967403772029e+0 + b = 0.1659073184763559e+0 + v = 0.1784555512007570e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4608742854473447e+0 + b = 0.1950703730454614e+0 + v = 0.1815687562112174e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4912598858949903e+0 + b = 0.2241721144376724e+0 + v = 0.1840864370663302e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5206882758945558e+0 + b = 0.2530655255406489e+0 + v = 0.1860676785390006e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5490940914019819e+0 + b = 0.2816118409731066e+0 + v = 0.1875690583743703e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5764123302025542e+0 + b = 0.3096780504593238e+0 + v = 0.1886453236347225e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6025786004213506e+0 + b = 0.3371348366394987e+0 + v = 0.1893501123329645e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6275291964794956e+0 + b = 0.3638547827694396e+0 + v = 0.1897366184519868e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3348189479861771e+0 + b = 0.2664841935537443e-1 + v = 0.1643908815152736e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.3699515545855295e+0 + b = 0.5424000066843495e-1 + v = 0.1696300350907768e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4042003071474669e+0 + b = 0.8251992715430854e-1 + v = 0.1741553103844483e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4375320100182624e+0 + b = 0.1112695182483710e+0 + v = 0.1780015282386092e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4699054490335947e+0 + b = 0.1402964116467816e+0 + v = 0.1812116787077125e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5012739879431952e+0 + b = 0.1694275117584291e+0 + v = 0.1838323158085421e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5315874883754966e+0 + b = 0.1985038235312689e+0 + v = 0.1859113119837737e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5607937109622117e+0 + b = 0.2273765660020893e+0 + v = 0.1874969220221698e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5888393223495521e+0 + b = 0.2559041492849764e+0 + v = 0.1886375612681076e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6156705979160163e+0 + b = 0.2839497251976899e+0 + v = 0.1893819575809276e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6412338809078123e+0 + b = 0.3113791060500690e+0 + v = 0.1897794748256767e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4076051259257167e+0 + b = 0.2757792290858463e-1 + v = 0.1738963926584846e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4423788125791520e+0 + b = 0.5584136834984293e-1 + v = 0.1777442359873466e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4760480917328258e+0 + b = 0.8457772087727143e-1 + v = 0.1810010815068719e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5085838725946297e+0 + b = 0.1135975846359248e+0 + v = 0.1836920318248129e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5399513637391218e+0 + b = 0.1427286904765053e+0 + v = 0.1858489473214328e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5701118433636380e+0 + b = 0.1718112740057635e+0 + v = 0.1875079342496592e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5990240530606021e+0 + b = 0.2006944855985351e+0 + v = 0.1887080239102310e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6266452685139695e+0 + b = 0.2292335090598907e+0 + v = 0.1894905752176822e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6529320971415942e+0 + b = 0.2572871512353714e+0 + v = 0.1898991061200695e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.4791583834610126e+0 + b = 0.2826094197735932e-1 + v = 0.1809065016458791e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5130373952796940e+0 + b = 0.5699871359683649e-1 + v = 0.1836297121596799e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5456252429628476e+0 + b = 0.8602712528554394e-1 + v = 0.1858426916241869e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5768956329682385e+0 + b = 0.1151748137221281e+0 + v = 0.1875654101134641e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6068186944699046e+0 + b = 0.1442811654136362e+0 + v = 0.1888240751833503e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6353622248024907e+0 + b = 0.1731930321657680e+0 + v = 0.1896497383866979e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6624927035731797e+0 + b = 0.2017619958756061e+0 + v = 0.1900775530219121e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5484933508028488e+0 + b = 0.2874219755907391e-1 + v = 0.1858525041478814e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.5810207682142106e+0 + b = 0.5778312123713695e-1 + v = 0.1876248690077947e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6120955197181352e+0 + b = 0.8695262371439526e-1 + v = 0.1889404439064607e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6416944284294319e+0 + b = 0.1160893767057166e+0 + v = 0.1898168539265290e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6697926391731260e+0 + b = 0.1450378826743251e+0 + v = 0.1902779940661772e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6147594390585488e+0 + b = 0.2904957622341456e-1 + v = 0.1890125641731815e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6455390026356783e+0 + b = 0.5823809152617197e-1 + v = 0.1899434637795751e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6747258588365477e+0 + b = 0.8740384899884715e-1 + v = 0.1904520856831751e-3 + grids.append(SphGenOh(5, a, b, v)) + a = 0.6772135750395347e+0 + b = 0.2919946135808105e-1 + v = 0.1905534498734563e-3 + grids.append(SphGenOh(5, a, b, v)) + return np.vstack(grids) + +# ~= (L+1)**2/3 +LEBEDEV_ORDER = { + 0 : 1 , + 3 : 6 , + 5 : 14 , + 7 : 26 , + 9 : 38 , + 11 : 50 , + 13 : 74 , + 15 : 86 , + 17 : 110 , + 19 : 146 , + 21 : 170 , + 23 : 194 , + 25 : 230 , + 27 : 266 , + 29 : 302 , + 31 : 350 , + 35 : 434 , + 41 : 590 , + 47 : 770 , + 53 : 974 , + 59 : 1202, + 65 : 1454, + 71 : 1730, + 77 : 2030, + 83 : 2354, + 89 : 2702, + 95 : 3074, + 101: 3470, + 107: 3890, + 113: 4334, + 119: 4802, + 125: 5294, + 131: 5810 +} +LEBEDEV_NGRID = np.array(list(LEBEDEV_ORDER.values())) + +@lru_cache(maxsize=50) +def MakeAngularGrid(points): + '''Angular grids for specified Lebedev points''' + if points in (0, 1): + return np.array((0., 0., 0., 1.)) + + if points not in LEBEDEV_NGRID: + raise ValueError('Unsupported angular grids %d' % points) + + fn = globals()['MakeAngularGrid_' + str(points)] + grids = fn() + return grids diff --git a/pyscf/dft/gen_grid.py b/pyscf/dft/gen_grid.py index eea7a69775..77c1c781fd 100644 --- a/pyscf/dft/gen_grid.py +++ b/pyscf/dft/gen_grid.py @@ -31,6 +31,7 @@ from pyscf import lib from pyscf.lib import logger from pyscf.dft import radi +from pyscf.dft.LebedevGrid import LEBEDEV_ORDER, LEBEDEV_NGRID, MakeAngularGrid from pyscf import gto from pyscf.gto.eval_gto import BLKSIZE, NBINS, CUTOFF, make_screen_index from pyscf import __config__ @@ -43,44 +44,6 @@ ALIGNMENT_UNIT = 8 NELEC_ERROR_TOL = getattr(__config__, 'dft_rks_prune_error_tol', 0.02) -# ~= (L+1)**2/3 -LEBEDEV_ORDER = { - 0 : 1 , - 3 : 6 , - 5 : 14 , - 7 : 26 , - 9 : 38 , - 11 : 50 , - 13 : 74 , - 15 : 86 , - 17 : 110 , - 19 : 146 , - 21 : 170 , - 23 : 194 , - 25 : 230 , - 27 : 266 , - 29 : 302 , - 31 : 350 , - 35 : 434 , - 41 : 590 , - 47 : 770 , - 53 : 974 , - 59 : 1202, - 65 : 1454, - 71 : 1730, - 77 : 2030, - 83 : 2354, - 89 : 2702, - 95 : 3074, - 101: 3470, - 107: 3890, - 113: 4334, - 119: 4802, - 125: 5294, - 131: 5810 -} -LEBEDEV_NGRID = numpy.array(list(LEBEDEV_ORDER.values())) - # SG0 # S. Chien and P. Gill, J. Comput. Chem. 27 (2006) 730-739. @@ -265,9 +228,7 @@ def gen_atomic_grids(mol, atom_grid={}, radi_method=radi.gauss_chebyshev, coords = [] vol = [] for n in sorted(set(angs)): - grid = numpy.empty((n,4)) - libdft.MakeAngularGrid(grid.ctypes.data_as(ctypes.c_void_p), - ctypes.c_int(n)) + grid = MakeAngularGrid(n) idx = numpy.where(angs==n)[0] #coords.append(numpy.einsum('i,jk->jik', rad[idx], grid[:,:3]).reshape(-1,3)) #vol.append(numpy.einsum('i,j->ji', rad_weight[idx], grid[:,3]).ravel()) diff --git a/pyscf/solvent/ddcosmo.py b/pyscf/solvent/ddcosmo.py index 80a4a6d8de..e0293aae8f 100644 --- a/pyscf/solvent/ddcosmo.py +++ b/pyscf/solvent/ddcosmo.py @@ -340,9 +340,7 @@ def regularize_xt(t, eta): def make_grids_one_sphere(lebedev_order): ngrid_1sph = gen_grid.LEBEDEV_ORDER[lebedev_order] - leb_grid = numpy.empty((ngrid_1sph,4)) - gen_grid.libdft.MakeAngularGrid(leb_grid.ctypes.data_as(ctypes.c_void_p), - ctypes.c_int(ngrid_1sph)) + leb_grid = gen_grid.MakeAngularGrid(ngrid_1sph) coords_1sph = leb_grid[:,:3] # Note the Lebedev angular grids are normalized to 1 in pyscf weights_1sph = 4*numpy.pi * leb_grid[:,3] diff --git a/pyscf/solvent/pcm.py b/pyscf/solvent/pcm.py index fc6412d292..75b777ad4b 100644 --- a/pyscf/solvent/pcm.py +++ b/pyscf/solvent/pcm.py @@ -139,9 +139,7 @@ def switch_h(x): def gen_surface(mol, ng=302, vdw_scale=1.2): '''J. Phys. Chem. A 1999, 103, 11060-11079''' - unit_sphere = numpy.empty((ng,4)) - libdft.MakeAngularGrid(unit_sphere.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(ng)) - + unit_sphere = gen_grid.MakeAngularGrid(ng) atom_coords = mol.atom_coords(unit='B') charges = mol.atom_charges() N_J = ng * numpy.ones(mol.natm) From 881dbbebcbc084ba2ba8fe968ff03fd34d7dfdf8 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Sun, 11 Feb 2024 11:44:57 -0800 Subject: [PATCH 14/44] Add an API to dump system info --- pyscf/gto/mole.py | 19 +++---------------- pyscf/lib/misc.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/pyscf/gto/mole.py b/pyscf/gto/mole.py index b79074aa56..4e06980ffb 100644 --- a/pyscf/gto/mole.py +++ b/pyscf/gto/mole.py @@ -25,9 +25,7 @@ import sys import types import re -import platform import gc -import time import json import ctypes @@ -2695,7 +2693,6 @@ def gto_norm(self, l, expnt): def dump_input(self): import __main__ - import pyscf if hasattr(__main__, '__file__'): try: filename = os.path.abspath(__main__.__file__) @@ -2709,19 +2706,9 @@ def dump_input(self): except IOError: logger.warn(self, 'input file does not exist') - self.stdout.write('System: %s Threads %s\n' % - (str(platform.uname()), lib.num_threads())) - self.stdout.write('Python %s\n' % sys.version) - self.stdout.write('numpy %s scipy %s\n' % - (numpy.__version__, scipy.__version__)) - self.stdout.write('Date: %s\n' % time.ctime()) - self.stdout.write('PySCF version %s\n' % pyscf.__version__) - info = lib.repo_info(os.path.join(__file__, '..', '..')) - self.stdout.write('PySCF path %s\n' % info['path']) - if 'git' in info: - self.stdout.write(info['git'] + '\n') - - self.stdout.write('\n') + self.stdout.write('\n'.join(lib.misc.format_sys_info())) + + self.stdout.write('\n\n') for key in os.environ: if 'PYSCF' in key: self.stdout.write('[ENV] %s %s\n' % (key, os.environ[key])) diff --git a/pyscf/lib/misc.py b/pyscf/lib/misc.py index 1b109208dc..46ebf5edf0 100644 --- a/pyscf/lib/misc.py +++ b/pyscf/lib/misc.py @@ -22,6 +22,8 @@ import os import sys +import time +import platform import warnings import tempfile import functools @@ -30,6 +32,7 @@ import collections import ctypes import numpy +import scipy import h5py from threading import Thread from multiprocessing import Queue, Process @@ -1303,6 +1306,22 @@ def git_info(repo_path): pass return orig_head, head, branch +def format_sys_info(): + '''Format a list of system information for printing.''' + import pyscf + info = repo_info(os.path.join(__file__, '..', '..')) + result = [ + f'System: {platform.uname()} Threads {num_threads()}', + f'Python {sys.version}', + f'numpy {numpy.__version__} scipy {scipy.__version__}', + f'Date: {time.ctime()}', + f'PySCF version {pyscf.__version__}', + f'PySCF path {info["path"]}', + ] + if 'git' in info: + result.append(info['git']) + return result + def isinteger(obj): ''' From 174e23c6b612c90fc2a54d1166042be7b7f72058 Mon Sep 17 00:00:00 2001 From: Xing Zhang Date: Wed, 21 Feb 2024 22:30:31 -0800 Subject: [PATCH 15/44] multigrid DFT version 2 (#2078) --- examples/pbc/27-multigrid.py | 4 +- examples/pbc/27-multigrid2.py | 238 ++++ pyscf/gto/mole.py | 22 +- pyscf/gto/moleintor.py | 1 + pyscf/lib/CMakeLists.txt | 40 + pyscf/lib/dft/CMakeLists.txt | 14 +- pyscf/lib/dft/grid_collocate.c | 655 +++++++++ pyscf/lib/dft/grid_common.c | 660 +++++++++ pyscf/lib/dft/grid_common.h | 109 ++ pyscf/lib/dft/grid_integrate.c | 1358 +++++++++++++++++++ pyscf/lib/dft/libxc_itrf.c | 290 +++- pyscf/lib/dft/multigrid.c | 744 +++++++++++ pyscf/lib/dft/multigrid.h | 72 + pyscf/lib/dft/utils.c | 62 + pyscf/lib/dft/utils.h | 27 + pyscf/lib/np_helper/np_helper.h | 7 + pyscf/lib/numpy_helper.py | 10 + pyscf/lib/pbc/CMakeLists.txt | 12 +- pyscf/lib/pbc/cell.c | 280 ++++ pyscf/lib/pbc/cell.h | 29 + pyscf/lib/pbc/fft.c | 147 ++ pyscf/lib/pbc/fft.h | 26 + pyscf/lib/pbc/fill_ints.c | 6 +- pyscf/lib/pbc/fill_ints.h | 29 + pyscf/lib/pbc/fill_ints_screened.c | 1012 ++++++++++++++ pyscf/lib/pbc/hf_grad.c | 95 ++ pyscf/lib/pbc/neighbor_list.c | 206 +++ pyscf/lib/pbc/neighbor_list.h | 41 + pyscf/lib/pbc/optimizer.c | 38 +- pyscf/lib/pbc/optimizer.h | 6 +- pyscf/lib/pbc/pp.c | 448 +++++++ pyscf/lib/test/test_numint_uniform_grid.py | 6 +- pyscf/pbc/df/incore.py | 244 ++++ pyscf/pbc/dft/gks.py | 2 +- pyscf/pbc/dft/kgks.py | 2 +- pyscf/pbc/dft/krks.py | 6 +- pyscf/pbc/dft/krks_ksymm.py | 6 +- pyscf/pbc/dft/kuks.py | 4 +- pyscf/pbc/dft/kuks_ksymm.py | 6 +- pyscf/pbc/dft/multigrid/__init__.py | 57 + pyscf/pbc/dft/{ => multigrid}/multigrid.py | 179 ++- pyscf/pbc/dft/multigrid/multigrid_pair.py | 1405 ++++++++++++++++++++ pyscf/pbc/dft/multigrid/pp.py | 290 ++++ pyscf/pbc/dft/multigrid/utils.py | 70 + pyscf/pbc/dft/rks.py | 6 +- pyscf/pbc/dft/test/test_krks_ksym.py | 8 +- pyscf/pbc/dft/test/test_multigrid.py | 52 +- pyscf/pbc/dft/test/test_multigrid2.py | 95 ++ pyscf/pbc/dft/uks.py | 4 +- pyscf/pbc/grad/__init__.py | 7 +- pyscf/pbc/grad/krhf.py | 8 +- pyscf/pbc/grad/rhf.py | 167 +++ pyscf/pbc/grad/rks.py | 24 + pyscf/pbc/grad/uhf.py | 92 ++ pyscf/pbc/grad/uks.py | 24 + pyscf/pbc/gto/__init__.py | 1 + pyscf/pbc/gto/_pbcintor.py | 21 +- pyscf/pbc/gto/cell.py | 223 +++- pyscf/pbc/gto/ewald_methods.py | 293 ++++ pyscf/pbc/gto/neighborlist.py | 199 +++ pyscf/pbc/gto/pseudo/pp_int.py | 367 ++++- pyscf/pbc/gto/pseudo/test/test_pp.py | 36 + pyscf/pbc/gto/test/test_cell.py | 25 + pyscf/pbc/scf/hf.py | 46 +- pyscf/pbc/scf/khf.py | 12 +- pyscf/pbc/scf/khf_ksymm.py | 6 +- pyscf/pbc/scf/kuhf.py | 6 +- pyscf/pbc/scf/kuhf_ksymm.py | 6 +- pyscf/pbc/scf/test/test_hf.py | 26 +- pyscf/pbc/scf/uhf.py | 9 +- pyscf/pbc/symm/geom.py | 2 +- pyscf/pbc/symm/pyscf_spglib.py | 2 +- pyscf/pbc/symm/symmetry.py | 2 +- pyscf/pbc/tools/pbc.py | 58 +- pyscf/scf/atom_hf.py | 15 +- pyscf/scf/atom_hf_pp.py | 154 +++ pyscf/scf/dhf.py | 4 +- pyscf/scf/diis.py | 8 +- pyscf/scf/hf.py | 34 +- pyscf/scf/uhf.py | 8 +- 80 files changed, 10713 insertions(+), 302 deletions(-) create mode 100644 examples/pbc/27-multigrid2.py create mode 100644 pyscf/lib/dft/grid_collocate.c create mode 100644 pyscf/lib/dft/grid_common.c create mode 100644 pyscf/lib/dft/grid_common.h create mode 100644 pyscf/lib/dft/grid_integrate.c create mode 100644 pyscf/lib/dft/multigrid.c create mode 100644 pyscf/lib/dft/multigrid.h create mode 100644 pyscf/lib/dft/utils.c create mode 100644 pyscf/lib/dft/utils.h create mode 100644 pyscf/lib/pbc/cell.c create mode 100644 pyscf/lib/pbc/cell.h create mode 100644 pyscf/lib/pbc/fft.c create mode 100644 pyscf/lib/pbc/fft.h create mode 100644 pyscf/lib/pbc/fill_ints.h create mode 100644 pyscf/lib/pbc/fill_ints_screened.c create mode 100644 pyscf/lib/pbc/hf_grad.c create mode 100644 pyscf/lib/pbc/neighbor_list.c create mode 100644 pyscf/lib/pbc/neighbor_list.h create mode 100644 pyscf/lib/pbc/pp.c create mode 100644 pyscf/pbc/dft/multigrid/__init__.py rename pyscf/pbc/dft/{ => multigrid}/multigrid.py (95%) create mode 100644 pyscf/pbc/dft/multigrid/multigrid_pair.py create mode 100644 pyscf/pbc/dft/multigrid/pp.py create mode 100644 pyscf/pbc/dft/multigrid/utils.py create mode 100644 pyscf/pbc/dft/test/test_multigrid2.py create mode 100644 pyscf/pbc/grad/rhf.py create mode 100644 pyscf/pbc/grad/rks.py create mode 100644 pyscf/pbc/grad/uhf.py create mode 100644 pyscf/pbc/grad/uks.py create mode 100644 pyscf/pbc/gto/ewald_methods.py create mode 100644 pyscf/pbc/gto/neighborlist.py create mode 100644 pyscf/scf/atom_hf_pp.py diff --git a/examples/pbc/27-multigrid.py b/examples/pbc/27-multigrid.py index f1b1f85a95..6809f33e3d 100644 --- a/examples/pbc/27-multigrid.py +++ b/examples/pbc/27-multigrid.py @@ -31,9 +31,9 @@ # # There are two ways to enable multigrid numerical integration # -# Method 1: use multigrid.multigrid function to update SCF object +# Method 1: use multigrid.multigrid_fftdf function to update SCF object # -mf = multigrid.multigrid(mf) +mf = multigrid.multigrid_fftdf(mf) mf.kernel() # diff --git a/examples/pbc/27-multigrid2.py b/examples/pbc/27-multigrid2.py new file mode 100644 index 0000000000..d73cd8fe50 --- /dev/null +++ b/examples/pbc/27-multigrid2.py @@ -0,0 +1,238 @@ +#from os.path import expanduser +#home_dir = expanduser("~") +#f = open(home_dir+'/.pyscf_conf.py', 'a') +# use FFTW for fft, this requires to compile the FFTW library +# cmake -DENABLE_FFTW=ON -DBUILD_FFTW=ON +#f.write('pbc_tools_pbc_fft_engine=\'FFTW\'') +#f.close() + +import numpy +import pyscf +from pyscf import lib +from pyscf import pbc +from pyscf.pbc import gto as pbcgto +from pyscf.pbc import dft as pbcdft +from pyscf.pbc.dft import multigrid + +cell=pbcgto.Cell() + +#Molecule +boxlen=12.4138 +cell.a=numpy.array([[boxlen,0.0,0.0],[0.0,boxlen,0.0],[0.0,0.0,boxlen]]) +cell.atom=""" +O 12.235322 1.376642 10.869880 +O 6.445390 3.706940 8.650794 +O 0.085977 2.181322 8.276663 +O 12.052554 2.671366 2.147199 +O 12.250036 4.190930 12.092014 +O 7.187422 0.959062 4.733469 +O 8.346457 7.210040 4.667644 +O 12.361546 11.527875 8.106887 +O 3.299984 4.440816 9.193275 +O 2.855829 3.759909 6.552815 +O 1.392494 6.362753 0.586172 +O 1.858645 8.694013 2.068738 +O 3.770231 12.094519 8.652183 +O 6.432508 3.669828 2.772418 +O 1.998724 1.820217 4.876440 +O 8.248581 2.404730 6.931303 +O 5.753814 3.360029 12.461534 +O 11.322212 5.649239 2.236798 +O 4.277318 2.113956 10.590808 +O 5.405015 3.349247 5.484702 +O 6.493278 11.869958 0.684912 +O 3.275250 2.346576 2.425241 +O 7.981003 6.352512 7.507970 +O 5.985990 6.512854 12.194648 +O 10.636714 11.856872 12.209540 +O 9.312283 3.670384 3.508594 +O 1.106885 5.830301 6.638695 +O 8.008007 3.326363 10.869818 +O 12.403000 9.687405 11.761901 +O 4.219782 7.085315 8.153470 +O 3.781557 8.203821 11.563272 +O 11.088898 4.532081 7.809475 +O 10.387548 8.408890 1.017882 +O 1.979016 6.418091 10.374159 +O 4.660547 0.549666 5.617403 +O 8.745880 12.256257 8.089383 +O 2.662041 10.489890 0.092980 +O 7.241661 10.471815 4.226946 +O 2.276827 0.276647 10.810417 +O 8.887733 0.946877 1.333885 +O 1.943554 8.088552 7.567650 +O 9.667942 8.056759 9.868847 +O 10.905491 8.339638 6.484782 +O 3.507733 4.862402 1.557439 +O 8.010457 8.642846 12.055969 +O 8.374446 10.035932 6.690309 +O 5.635247 6.076875 5.563993 +O 11.728434 1.601906 5.079475 +O 9.771134 9.814114 3.548703 +O 3.944355 10.563450 4.687536 +O 0.890357 6.382287 4.065806 +O 6.862447 6.425182 2.488202 +O 3.813963 6.595122 3.762649 +O 6.562448 8.295463 8.807182 +O 9.809455 0.143325 3.886553 +O 4.117074 11.661225 2.221679 +O 5.295317 8.735561 2.763183 +O 9.971999 5.379339 5.340378 +O 12.254708 8.643874 3.957116 +O 2.344274 10.761274 6.829162 +O 7.013416 0.643488 10.518797 +O 5.152349 10.233624 10.359388 +O 11.184278 5.884064 10.298279 +O 12.252335 8.974142 9.070831 +H 12.415139 2.233125 11.257611 +H 11.922476 1.573799 9.986994 +H 5.608192 3.371543 8.971482 +H 6.731226 3.060851 8.004962 +H -0.169205 1.565594 7.589645 +H -0.455440 2.954771 8.118939 +H 12.125168 2.826463 1.205443 +H 12.888828 2.969761 2.504745 +H 11.553255 4.386613 11.465566 +H 12.818281 4.960808 12.067151 +H 7.049495 1.772344 4.247898 +H 6.353019 0.798145 5.174047 +H 7.781850 7.384852 5.420566 +H 9.103203 6.754017 5.035898 +H 12.771232 11.788645 8.931744 +H 12.018035 10.650652 8.276334 +H 3.557245 3.792529 9.848846 +H 2.543844 4.884102 9.577958 +H 2.320235 4.521250 6.329813 +H 2.872128 3.749963 7.509824 +H 1.209685 7.121391 1.140501 +H 2.238885 6.038801 0.894245 +H 2.763109 8.856353 2.336735 +H 1.329379 9.047369 2.783755 +H 4.315639 11.533388 9.203449 +H 3.098742 12.433043 9.244412 +H 5.987369 3.448974 3.590530 +H 5.813096 3.419344 2.086985 +H 1.057126 1.675344 4.969379 +H 2.248496 2.292119 5.670892 +H 8.508264 1.653337 7.464411 +H 8.066015 2.034597 6.067646 +H 5.197835 2.915542 11.821572 +H 6.630900 3.329981 12.079371 +H 10.788986 6.436672 2.127933 +H 11.657923 5.463602 1.359832 +H 3.544476 1.634958 10.977765 +H 4.755770 1.455054 10.087655 +H 4.465371 3.375459 5.665294 +H 5.682663 4.264430 5.524498 +H 6.174815 11.778676 1.582954 +H 5.713640 12.089924 0.174999 +H 3.476076 1.498708 2.028983 +H 2.730229 2.134295 3.182949 +H 7.119624 5.936450 7.474030 +H 8.536492 5.799405 6.958665 +H 5.909499 5.717477 11.667621 +H 6.125402 6.196758 13.087330 +H 11.203499 12.513536 11.804844 +H 10.260930 12.300153 12.970145 +H 9.985036 3.927685 2.878172 +H 8.545584 3.468329 2.972331 +H 1.399882 6.620092 7.093246 +H 0.963561 6.112523 5.735345 +H 8.067363 3.674002 9.979955 +H 8.000737 2.375959 10.756190 +H 11.821629 10.402510 12.020482 +H 12.206854 8.983242 12.379892 +H 3.461473 7.606485 7.889688 +H 3.844478 6.304711 8.560946 +H 3.179884 7.585614 11.148494 +H 4.401957 7.652030 12.039573 +H 11.573777 5.053211 7.169515 +H 10.342076 4.186083 7.320831 +H 10.065640 8.919194 1.760981 +H 9.629585 8.322499 0.439729 +H 1.396302 6.546079 9.625630 +H 1.405516 6.479759 11.138049 +H 4.024008 1.232518 5.405828 +H 4.736858 0.579881 6.571077 +H 9.452293 12.313381 8.732772 +H 8.976559 11.502788 7.545965 +H 1.834701 10.012311 0.153462 +H 3.295197 9.836403 -0.204175 +H 7.056724 11.401702 4.095264 +H 6.499038 10.020287 3.825865 +H 1.365541 0.487338 11.013887 +H 2.501591 -0.428131 11.417871 +H 8.644279 1.812362 1.005409 +H 8.142674 0.388030 1.112955 +H 1.272659 8.365063 8.191888 +H 2.142485 8.877768 7.063867 +H 8.961493 7.826192 9.265523 +H 9.227102 8.487654 10.601118 +H 10.150144 7.758934 6.392768 +H 10.596082 9.187988 6.167290 +H 3.463106 4.096188 2.129414 +H 3.919461 4.539801 0.755791 +H 7.418998 9.394959 12.028876 +H 7.430413 7.883095 12.106546 +H 7.972905 10.220334 5.841196 +H 7.675111 9.631498 7.203725 +H 5.332446 6.381336 6.419473 +H 5.000025 6.434186 4.943466 +H 11.575078 2.271167 4.412540 +H 11.219802 0.847030 4.783357 +H 8.865342 9.721516 3.843998 +H 10.000732 10.719285 3.758898 +H 3.186196 10.476397 5.265333 +H 4.407331 11.335128 5.013723 +H 0.558187 7.255936 3.859331 +H 0.341672 5.789383 3.552346 +H 7.459933 6.526049 3.229193 +H 6.696228 5.483739 2.440372 +H 3.864872 6.313007 2.849385 +H 2.876419 6.621201 3.953862 +H 5.631529 8.079145 8.753997 +H 7.003296 7.568245 8.367822 +H 9.615413 0.527902 3.031755 +H 8.962985 0.109366 4.332162 +H 3.825854 11.139182 1.474087 +H 4.063988 11.063232 2.967211 +H 5.784391 7.914558 2.708486 +H 4.780461 8.655167 3.566110 +H 10.880659 5.444664 5.046607 +H 9.593331 4.687991 4.797350 +H 11.562317 8.960134 3.376765 +H 11.926084 8.816948 4.839320 +H 2.856874 11.297981 7.433660 +H 1.492332 11.195517 6.786033 +H 7.145820 0.090200 9.749009 +H 7.227275 0.077690 11.260665 +H 4.662021 9.538430 10.798155 +H 5.994537 9.833472 10.142985 +H 10.544299 6.595857 10.301445 +H 11.281750 5.653082 9.374494 +H 12.103020 8.841164 10.006916 +H 11.491592 8.576221 8.647557 +""" +cell.basis = 'gth-tzv2p' +cell.ke_cutoff = 200 # kinetic energy cutoff in a.u. +cell.max_memory = 8000 # in MB +cell.precision = 1e-6 # integral precision +cell.pseudo = 'gth-pade' +cell.verbose = 4 +cell.use_loose_rcut = True # integral screening based on shell radii +cell.use_particle_mesh_ewald = True # use particle mesh ewald for nuclear repulsion +cell.build() +#cell = pbc.tools.super_cell(cell, [1,2,2]) #build super cell by replicating unit cell + +mf=pbcdft.RKS(cell) +#mf.xc = "LDA, VWN" +mf.xc = "PBE,PBE" +mf.init_guess = 'atom' # atom guess is fast +mf.with_df = multigrid.MultiGridFFTDF2(cell) +mf.with_df.ngrids = 4 # number of sets of grid points +mf.kernel() + +# Nuclear Gradients +from pyscf.pbc.grad import rks as rks_grad +grad = rks_grad.Gradients(mf) +g = grad.kernel() diff --git a/pyscf/gto/mole.py b/pyscf/gto/mole.py index 4e06980ffb..28d8fd444d 100644 --- a/pyscf/gto/mole.py +++ b/pyscf/gto/mole.py @@ -61,6 +61,7 @@ NUC_MOD_OF = 2 PTR_ZETA = 3 PTR_FRAC_CHARGE = 4 +PTR_RADIUS = 5 ATM_SLOTS = 6 ATOM_OF = 0 ANG_OF = 1 @@ -2412,6 +2413,15 @@ def ms(self, x): else: self.spin = int(round(2*x, 4)) + @property + def enuc(self): + '''nuclear repulsion energy''' + if self._enuc is None: + self._enuc = self.energy_nuc() + return self._enuc + @enuc.setter + def enuc(self, enuc): + self._enuc = enuc copy = copy @@ -2576,6 +2586,9 @@ def build(self, dump_input=True, parse_arg=ARGPARSE, # number of electrons are consistent. self.nelec + # reset nuclear energy + self.enuc = None + if not self.magmom: self.magmom = [0,] * self.natm elif len(self.magmom) != self.natm: @@ -2784,7 +2797,7 @@ def dump_input(self): if self.verbose >= logger.INFO: self.stdout.write('\n') - logger.info(self, 'nuclear repulsion = %.15g', self.energy_nuc()) + logger.info(self, 'nuclear repulsion = %.15g', self.enuc) if self.symmetry: if self.topgroup == self.groupname: logger.info(self, 'point group symmetry = %s', self.topgroup) @@ -3050,6 +3063,9 @@ def set_geom_(self, atoms_or_coords, unit=None, symmetry=None, mol.symmetry = symmetry mol.build(False, False) + # reset nuclear energy + mol.enuc = None + if mol.verbose >= logger.INFO: logger.info(mol, 'New geometry') for ia, atom in enumerate(mol._atom): @@ -3542,7 +3558,9 @@ def intor_by_shell(self, intor, shells, comp=None, grids=None): eval_ao = eval_gto = eval_gto - energy_nuc = get_enuc = energy_nuc + energy_nuc = energy_nuc + def get_enuc(self): + return self.enuc def get_ao_indices(self, bas_list, ao_loc=None): ''' diff --git a/pyscf/gto/moleintor.py b/pyscf/gto/moleintor.py index e3d661f1e0..4c6a4ce8cf 100644 --- a/pyscf/gto/moleintor.py +++ b/pyscf/gto/moleintor.py @@ -429,6 +429,7 @@ def _get_intor_and_comp(intor_name, comp=None): 'int2c2e_ip1ip2' : (9, 9), 'int2c2e_ipip1' : (9, 9), 'int3c1e' : (1, 1), + 'int3c1e_ip1' : (3, 3), 'int3c1e_p2' : (1, 1), 'int3c1e_iprinv' : (3, 3), 'int2c2e' : (1, 1), diff --git a/pyscf/lib/CMakeLists.txt b/pyscf/lib/CMakeLists.txt index 1dc076da21..4b7236535e 100644 --- a/pyscf/lib/CMakeLists.txt +++ b/pyscf/lib/CMakeLists.txt @@ -136,6 +136,9 @@ else () set(CMAKE_INSTALL_RPATH "\$ORIGIN:\$ORIGIN/deps/lib:\$ORIGIN/deps/lib64") endif () +option(ENABLE_FFTW "Using fftw3" OFF) +option(BUILD_FFTW "Building fftw3" OFF) + add_subdirectory(np_helper) add_subdirectory(gto) add_subdirectory(vhf) @@ -198,6 +201,12 @@ option(ENABLE_XCFUN "Using xcfun for XC functional library" ON) option(BUILD_LIBXC "Download and build libxc library" ON) option(BUILD_XCFUN "Download and build xcfun library" ON) +option(ENABLE_LIBXSMM "Using libxsmm" OFF) +option(BUILD_LIBXSMM "Building libxsmm" OFF) +if(APPLE) + set(ENABLE_LIBXSMM OFF) +endif() + if(NOT DISABLE_DFT) add_subdirectory(dft) @@ -237,8 +246,39 @@ if(ENABLE_XCFUN AND BUILD_XCFUN) add_dependencies(xcfun_itrf libxcfun) add_dependencies(dft libxcfun) endif() # ENABLE_XCFUN + +if(ENABLE_LIBXSMM AND BUILD_LIBXSMM) + if(NOT EXISTS "${PROJECT_SOURCE_DIR}/deps/include/libxsmm.h") + ExternalProject_Add(libxsmm + GIT_REPOSITORY https://github.com/hfp/libxsmm.git + GIT_TAG 1.17 + PREFIX ${PROJECT_BINARY_DIR}/deps + INSTALL_DIR ${PROJECT_SOURCE_DIR}/deps + CONFIGURE_COMMAND "" + BUILD_IN_SOURCE True + BUILD_COMMAND make -j4 PREFIX= CXX=${CMAKE_CXX_COMPILER} CC=${CMAKE_C_COMPILER} STATIC=0 MALLOC=0 INTRINSICS=2 install + INSTALL_COMMAND "" + ) + add_dependencies(dft libxsmm) + endif() +endif() endif() # DISABLE_DFT +if(ENABLE_FFTW AND BUILD_FFTW) +# if(NOT EXISTS "${PROJECT_SOURCE_DIR}/deps/include/fftw3.h") + ExternalProject_Add(libfftw3 + URL https://www.fftw.org/fftw-3.3.10.tar.gz + PREFIX ${PROJECT_BINARY_DIR}/deps + INSTALL_DIR ${PROJECT_SOURCE_DIR}/deps + BUILD_IN_SOURCE True + CONFIGURE_COMMAND ./configure --enable-static=no --enable-shared=yes --enable-threads CXX=${CMAKE_CXX_COMPILER} CC=${CMAKE_C_COMPILER} prefix= + BUILD_COMMAND make -j4 install + ) + add_dependencies(fft libfftw3) + add_dependencies(pbc libfftw3) +# endif() +endif() + if(EXISTS "${PROJECT_SOURCE_DIR}/cmake.user.inc") include("${PROJECT_SOURCE_DIR}/cmake.user.inc") endif() diff --git a/pyscf/lib/dft/CMakeLists.txt b/pyscf/lib/dft/CMakeLists.txt index 6b01b7eca0..c7263183c8 100644 --- a/pyscf/lib/dft/CMakeLists.txt +++ b/pyscf/lib/dft/CMakeLists.txt @@ -15,14 +15,19 @@ add_library(dft SHARED CxLebedevGrid.c grid_basis.c nr_numint.c r_numint.c numint_uniform_grid.c xc_deriv.c nr_numint_sparse.c - ) -add_dependencies(dft cgto cvhf np_helper) + multigrid.c grid_common.c grid_collocate.c grid_integrate.c utils.c +) +add_dependencies(dft cgto cvhf np_helper pbc) set_target_properties(dft PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}) -target_link_libraries(dft cvhf cgto cint np_helper ${BLAS_LIBRARIES} ${OPENMP_C_PROPERTIES}) - +if(ENABLE_LIBXSMM) + add_definitions(-DHAVE_LIBXSMM) + target_link_libraries(dft cvhf cgto cint np_helper pbc xsmm ${BLAS_LIBRARIES} ${OPENMP_C_PROPERTIES}) +else() + target_link_libraries(dft cvhf cgto cint np_helper pbc ${BLAS_LIBRARIES} ${OPENMP_C_PROPERTIES}) +endif() if(ENABLE_LIBXC) add_library(xc_itrf SHARED libxc_itrf.c) @@ -37,4 +42,3 @@ set_target_properties(xcfun_itrf PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}) target_link_libraries(xcfun_itrf xcfun ${OPENMP_C_PROPERTIES}) endif() - diff --git a/pyscf/lib/dft/grid_collocate.c b/pyscf/lib/dft/grid_collocate.c new file mode 100644 index 0000000000..33842191d3 --- /dev/null +++ b/pyscf/lib/dft/grid_collocate.c @@ -0,0 +1,655 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include "config.h" +#include "vhf/fblas.h" +#include "np_helper/np_helper.h" +#include "dft/multigrid.h" +#include "dft/grid_common.h" +#include "dft/utils.h" + +#define MAX_THREADS 256 +#define PTR_RADIUS 5 + +static void transform_dm(double* dm_cart, double* dm, + double* ish_contr_coeff, double* jsh_contr_coeff, + int* ish_ao_loc, int* jsh_ao_loc, + int* ish_bas, int* jsh_bas, int ish, int jsh, + int ish0, int jsh0, int naoj, double* cache) +{ + int i0 = ish_ao_loc[ish] - ish_ao_loc[ish0]; + int i1 = ish_ao_loc[ish+1] - ish_ao_loc[ish0]; + int j0 = jsh_ao_loc[jsh] - jsh_ao_loc[jsh0]; + int j1 = jsh_ao_loc[jsh+1] - jsh_ao_loc[jsh0]; + + int nrow = i1 - i0; + int ncol = j1 - j0; + double* pdm = dm + ((size_t)naoj) * i0 + j0; + + int l_i = ish_bas[ANG_OF+ish*BAS_SLOTS]; + int ncart_i = _LEN_CART[l_i]; + int nprim_i = ish_bas[NPRIM_OF+ish*BAS_SLOTS]; + int nao_i = nprim_i*ncart_i; + int l_j = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + int ncart_j = _LEN_CART[l_j]; + int nprim_j = jsh_bas[NPRIM_OF+jsh*BAS_SLOTS]; + int nao_j = nprim_j*ncart_j; + + const char TRANS_T = 'T'; + const char TRANS_N = 'N'; + const double D1 = 1; + const double D0 = 0; + //einsum("pi,ij,qj->pq", coeff_i, dm, coeff_j) + dgemm_wrapper(TRANS_T, TRANS_N, nao_j, nrow, ncol, + D1, jsh_contr_coeff, ncol, pdm, naoj, D0, cache, nao_j); + dgemm_wrapper(TRANS_N, TRANS_N, nao_j, nao_i, nrow, + D1, cache, nao_j, ish_contr_coeff, nrow, D0, dm_cart, nao_j); +} + + +static void add_rho_submesh(double* rho, double* pqr, + int* mesh_lb, int* mesh_ub, int* submesh_lb, + const int* mesh, const int* submesh) +{ + const int x0 = mesh_lb[0]; + const int y0 = mesh_lb[1]; + const int z0 = mesh_lb[2]; + + const int nx = mesh_ub[0] - x0; + const int ny = mesh_ub[1] - y0; + const int nz = mesh_ub[2] - z0; + + const int x0_sub = submesh_lb[0]; + const int y0_sub = submesh_lb[1]; + const int z0_sub = submesh_lb[2]; + + const size_t mesh_yz = ((size_t) mesh[1]) * mesh[2]; + const size_t submesh_yz = ((size_t) submesh[1]) * submesh[2]; + + int ix, iy, iz; + for (ix = 0; ix < nx; ix++) { + double* __restrict ptr_rho = rho + (ix + x0) * mesh_yz + y0 * mesh[2] + z0; + double* __restrict ptr_pqr = pqr + (ix + x0_sub) * submesh_yz + y0_sub * submesh[2] + z0_sub; + for (iy = 0; iy < ny; iy++) { + #pragma omp simd + for (iz = 0; iz < nz; iz++) { + ptr_rho[iz] += ptr_pqr[iz]; + } + ptr_rho += mesh[2]; + ptr_pqr += submesh[2]; + } + } +} + + +static void _orth_rho(double *rho, double *dm_xyz, + double fac, int topl, + int *mesh, int *grid_slice, + double *xs_exp, double *ys_exp, double *zs_exp, + double *cache) +{ + const int l1 = topl + 1; + const int l1l1 = l1 * l1; + const int nx0 = grid_slice[0]; + const int nx1 = grid_slice[1]; + const int ny0 = grid_slice[2]; + const int ny1 = grid_slice[3]; + const int nz0 = grid_slice[4]; + const int nz1 = grid_slice[5]; + const int ngridx = nx1 - nx0; + const int ngridy = ny1 - ny0; + const int ngridz = nz1 - nz0; + if (ngridx == 0 || ngridy == 0 || ngridz == 0) { + return; + } + + const char TRANS_N = 'N'; + const char TRANS_T = 'T'; + const double D0 = 0; + const double D1 = 1; + const int xcols = ngridy * ngridz; + double *xyr = cache; + double *xqr = xyr + l1l1 * ngridz; + double *pqr = xqr + l1 * xcols; + int ix, iy, iz, l; + + dgemm_wrapper(TRANS_N, TRANS_N, ngridz, l1l1, l1, + fac, zs_exp, ngridz, dm_xyz, l1, + D0, xyr, ngridz); + for (l = 0; l <= topl; l++) { + dgemm_wrapper(TRANS_N, TRANS_T, ngridz, ngridy, l1, + D1, xyr+l*l1*ngridz, ngridz, ys_exp, ngridy, + D0, xqr+l*xcols, ngridz); + } + dgemm_wrapper(TRANS_N, TRANS_T, xcols, ngridx, l1, + D1, xqr, xcols, xs_exp, ngridx, + D0, pqr, xcols); + + const int submesh[3] = {ngridx, ngridy, ngridz}; + int lb[3], ub[3]; + for (ix = 0; ix < ngridx;) { + lb[0] = modulo(ix + nx0, mesh[0]); + ub[0] = get_upper_bound(lb[0], mesh[0], ix, ngridx); + for (iy = 0; iy < ngridy;) { + lb[1] = modulo(iy + ny0, mesh[1]); + ub[1] = get_upper_bound(lb[1], mesh[1], iy, ngridy); + for (iz = 0; iz < ngridz;) { + lb[2] = modulo(iz + nz0, mesh[2]); + ub[2] = get_upper_bound(lb[2], mesh[2], iz, ngridz); + int lb_sub[3] = {ix, iy, iz}; + add_rho_submesh(rho, pqr, lb, ub, lb_sub, mesh, submesh); + iz += ub[2] - lb[2]; + } + iy += ub[1] - lb[1]; + } + ix += ub[0] - lb[0]; + } +} + + +void make_rho_lda_orth(double *rho, double *dm, int comp, + int li, int lj, double ai, double aj, + double *ri, double *rj, double fac, double cutoff, + int dimension, double* dh, double *a, double *b, + int *mesh, double *cache) +{ + int topl = li + lj; + int l1 = topl + 1; + int l1l1l1 = l1 * l1 * l1; + int grid_slice[6]; + double *xs_exp, *ys_exp, *zs_exp; + int data_size = init_orth_data(&xs_exp, &ys_exp, &zs_exp, + grid_slice, dh, mesh, topl, cutoff, + ai, aj, ri, rj, cache); + + if (data_size == 0) { + return; + } + cache += data_size; + + double *dm_xyz = cache; + cache += l1l1l1; + memset(dm_xyz, 0, l1l1l1*sizeof(double)); + + _dm_to_dm_xyz(dm_xyz, dm, li, lj, ri, rj, cache); + + _orth_rho(rho, dm_xyz, fac, topl, mesh, grid_slice, + xs_exp, ys_exp, zs_exp, cache); +} + + +static void _apply_rho(void (*eval_rho)(), double *rho, double *dm, + PGFPair* pgfpair, int comp, int dimension, + double* dh, double *a, double *b, int *mesh, + double* ish_gto_norm, double* jsh_gto_norm, + int *ish_atm, int *ish_bas, double *ish_env, + int *jsh_atm, int *jsh_bas, double *jsh_env, + double* Ls, double *cache) +{ + int ish = pgfpair->ish; + int jsh = pgfpair->jsh; + int ipgf = pgfpair->ipgf; + int jpgf = pgfpair->jpgf; + int iL = pgfpair->iL; + double cutoff = pgfpair->radius; + + double *ri = ish_env + ish_atm[PTR_COORD+ish_bas[ATOM_OF+ish*BAS_SLOTS]*ATM_SLOTS]; + double *rj = jsh_env + jsh_atm[PTR_COORD+jsh_bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS]; + double *rL = Ls + iL*3; + double rjL[3]; + rjL[0] = rj[0] + rL[0]; + rjL[1] = rj[1] + rL[1]; + rjL[2] = rj[2] + rL[2]; + + const int li = ish_bas[ANG_OF+ish*BAS_SLOTS]; + const int lj = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + double ai = ish_env[ish_bas[PTR_EXP+ish*BAS_SLOTS]+ipgf]; + double aj = jsh_env[jsh_bas[PTR_EXP+jsh*BAS_SLOTS]+jpgf]; + double ci = ish_gto_norm[ipgf]; + double cj = jsh_gto_norm[jpgf]; + double aij = ai + aj; + double rrij = CINTsquare_dist(ri, rjL); + double eij = (ai * aj / aij) * rrij; + if (eij > EIJCUTOFF) { + return; + } + double fac = exp(-eij) * ci * cj * CINTcommon_fac_sp(li) * CINTcommon_fac_sp(lj); + if (fac < ish_env[PTR_EXPDROP] && fac < jsh_env[PTR_EXPDROP]) { + return; + } + + (*eval_rho)(rho, dm, comp, li, lj, ai, aj, ri, rjL, + fac, cutoff, dimension, dh, a, b, mesh, cache); +} + + +static size_t _rho_cache_size(int l, int nprim, int nctr, int* mesh, double radius, double* dh) +{ + size_t size = 0; + size_t mesh_size = ((size_t)mesh[0]) * mesh[1] * mesh[2]; + size_t nmx = get_max_num_grid_orth(dh, radius); + int l1 = 2 * l + 1; + int l1l1 = l1 * l1; + int max_mesh = MAX(MAX(mesh[0], mesh[1]), mesh[2]); + size += (nprim * _LEN_CART[l]) * (nprim * _LEN_CART[l]); // dm_cart + size += _LEN_CART[l]*_LEN_CART[l]; // dm_pgf + size += nctr * _LEN_CART[l] * nprim * _LEN_CART[l]; // transform_dm + size += l1 * (mesh[0] + mesh[1] + mesh[2]); // xs_exp, ys_exp, zs_exp + size += l1l1 * l1; // dm_xyz + size += 3 * (_LEN_CART[l] + l1); // _dm_to_dm_xyz + + size_t size_orth_components = l1 * nmx + nmx; // orth_components + size_t size_orth_rho = 0; // _orth_rho + if (nmx < max_mesh) { + size_orth_rho = l1l1*nmx + l1*nmx*nmx + nmx*nmx*nmx; + } else { + size_orth_rho = l1l1*mesh[2] + l1*mesh[1]*mesh[2] + mesh_size; + } + size += MAX(size_orth_rho, size_orth_components); + size += 1000000; + //printf("Memory allocated per thread for make_rho: %ld MB.\n", (size+mesh_size)*sizeof(double) / 1000000); + return size; +} + + +static size_t _rho_core_cache_size(int* mesh, double radius, double* dh) +{ + size_t size = 0; + size_t mesh_size = ((size_t)mesh[0]) * mesh[1] * mesh[2]; + size_t nmx = get_max_num_grid_orth(dh, radius); + int l = 0; + int l1 = 1; + int l1l1 = l1 * l1; + int max_mesh = MAX(MAX(mesh[0], mesh[1]), mesh[2]); + size += l1 * (mesh[0] + mesh[1] + mesh[2]); + size += l1l1 * l1; + size += 3 * (_LEN_CART[l] + l1); + + size_t size_orth_components = l1 * nmx + nmx; + size_t size_orth_rho = 0; + if (nmx < max_mesh) { + size_orth_rho = l1l1*nmx + l1*nmx*nmx + nmx*nmx*nmx; + } else { + size_orth_rho = l1l1*mesh[2] + l1*mesh[1]*mesh[2] + mesh_size; + } + size += MAX(size_orth_rho, size_orth_components); + //size += 1000000; + return size; +} + + +void grid_collocate_drv(void (*eval_rho)(), RS_Grid** rs_rho, double* dm, TaskList** task_list, + int comp, int hermi, int *shls_slice, int* ish_ao_loc, int* jsh_ao_loc, + int dimension, double* Ls, double* a, double* b, + int* ish_atm, int* ish_bas, double* ish_env, + int* jsh_atm, int* jsh_bas, double* jsh_env, int cart) +{ + TaskList* tl = *task_list; + GridLevel_Info* gridlevel_info = tl->gridlevel_info; + int nlevels = gridlevel_info->nlevels; + + assert (comp == (*rs_rho)->comp); + + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + //const int nijsh = nish * njsh; + //const int naoi = ish_ao_loc[ish1] - ish_ao_loc[ish0]; + const int naoj = jsh_ao_loc[jsh1] - jsh_ao_loc[jsh0]; + + double **gto_norm_i = (double**) malloc(sizeof(double*) * nish); + double **cart2sph_coeff_i = (double**) malloc(sizeof(double*) * nish); + get_cart2sph_coeff(cart2sph_coeff_i, gto_norm_i, ish0, ish1, ish_bas, ish_env, cart); + double **gto_norm_j = gto_norm_i; + double **cart2sph_coeff_j = cart2sph_coeff_i; + if (hermi != 1) { + gto_norm_j = (double**) malloc(sizeof(double*) * njsh); + cart2sph_coeff_j = (double**) malloc(sizeof(double*) * njsh); + get_cart2sph_coeff(cart2sph_coeff_j, gto_norm_j, jsh0, jsh1, jsh_bas, jsh_env, cart); + } + + int ish_lmax = get_lmax(ish0, ish1, ish_bas); + int jsh_lmax = ish_lmax; + if (hermi != 1) { + jsh_lmax = get_lmax(jsh0, jsh1, jsh_bas); + } + + int ish_nprim_max = get_nprim_max(ish0, ish1, ish_bas); + int jsh_nprim_max = ish_nprim_max; + if (hermi != 1) { + jsh_nprim_max = get_nprim_max(jsh0, jsh1, jsh_bas); + } + + int ish_nctr_max = get_nctr_max(ish0, ish1, ish_bas); + int jsh_nctr_max = ish_nctr_max; + if (hermi != 1) { + jsh_nctr_max = get_nctr_max(jsh0, jsh1, jsh_bas); + } + + int ilevel; + int *mesh; + double max_radius; + double *rho, *rhobufs[MAX_THREADS]; + Task* task; + size_t ntasks; + PGFPair** pgfpairs; + for (ilevel = 0; ilevel < nlevels; ilevel++) { + task = (tl->tasks)[ilevel]; + ntasks = task->ntasks; + if (ntasks <= 0) { + continue; + } + pgfpairs = task->pgfpairs; + max_radius = task->radius; + + rho = (*rs_rho)->data[ilevel]; + mesh = gridlevel_info->mesh + ilevel*3; + + double dh[9]; + get_grid_spacing(dh, a, mesh); + + int *task_loc; + int nblock = get_task_loc(&task_loc, pgfpairs, ntasks, ish0, ish1, jsh0, jsh1, hermi); + + size_t cache_size = _rho_cache_size(MAX(ish_lmax,jsh_lmax), + MAX(ish_nprim_max, jsh_nprim_max), + MAX(ish_nctr_max, jsh_nctr_max), mesh, max_radius, dh); + size_t ngrids = ((size_t)mesh[0]) * mesh[1] * mesh[2]; + +#pragma omp parallel +{ + PGFPair *pgfpair = NULL; + int iblock, itask, ish, jsh; + double *ptr_gto_norm_i, *ptr_gto_norm_j; + double *cache0 = malloc(sizeof(double) * cache_size); + double *dm_cart = cache0; + double *dm_pgf = cache0 + ish_nprim_max*_LEN_CART[ish_lmax]*jsh_nprim_max*_LEN_CART[jsh_lmax]; + double *cache = dm_pgf + _LEN_CART[ish_lmax]*_LEN_CART[jsh_lmax]; + + int thread_id = omp_get_thread_num(); + double *rho_priv; + if (thread_id == 0) { + rho_priv = rho; + } else { + rho_priv = calloc(comp*ngrids, sizeof(double)); + } + rhobufs[thread_id] = rho_priv; + + #pragma omp for schedule(dynamic) + for (iblock = 0; iblock < nblock; iblock+=2) { + itask = task_loc[iblock]; + pgfpair = pgfpairs[itask]; + ish = pgfpair->ish; + jsh = pgfpair->jsh; + ptr_gto_norm_i = gto_norm_i[ish]; + ptr_gto_norm_j = gto_norm_j[jsh]; + transform_dm(dm_cart, dm, cart2sph_coeff_i[ish], + cart2sph_coeff_j[jsh], ish_ao_loc, jsh_ao_loc, + ish_bas, jsh_bas, ish, jsh, ish0, jsh0, naoj, cache); + for (; itask < task_loc[iblock+1]; itask++) { + pgfpair = pgfpairs[itask]; + get_dm_pgfpair(dm_pgf, dm_cart, pgfpair, ish_bas, jsh_bas, hermi); + _apply_rho(eval_rho, rho_priv, dm_pgf, pgfpair, comp, dimension, dh, a, b, mesh, + ptr_gto_norm_i, ptr_gto_norm_j, ish_atm, ish_bas, ish_env, + jsh_atm, jsh_bas, jsh_env, Ls, cache); + } + } + + free(cache0); + NPomp_dsum_reduce_inplace(rhobufs, comp*ngrids); + if (thread_id != 0) { + free(rho_priv); + } +} + if (task_loc) { + free(task_loc); + } + } // loop ilevel + + del_cart2sph_coeff(cart2sph_coeff_i, gto_norm_i, ish0, ish1); + if (hermi != 1) { + del_cart2sph_coeff(cart2sph_coeff_j, gto_norm_j, jsh0, jsh1); + } +} + + +void build_core_density(void (*eval_rho)(), double* rho, + int* atm, int* bas, int nbas, double* env, + int* mesh, int dimension, double* a, double* b, double max_radius) +{ + size_t ngrids; + ngrids = ((size_t) mesh[0]) * mesh[1] * mesh[2]; + + double dh[9]; + get_grid_spacing(dh, a, mesh); + + double *rhobufs[MAX_THREADS]; + size_t cache_size = _rho_core_cache_size(mesh, max_radius, dh); + +#pragma omp parallel +{ + int ia, ib; + double alpha, coeff, charge, rad, fac; + double dm[] = {1.0}; + double *r0; + double *cache = (double*) malloc(sizeof(double) * cache_size); + + int thread_id = omp_get_thread_num(); + double *rho_priv; + if (thread_id == 0) { + rho_priv = rho; + } else { + rho_priv = calloc(ngrids, sizeof(double)); + } + rhobufs[thread_id] = rho_priv; + + #pragma omp for schedule(static) + for (ib = 0; ib < nbas; ib++) { + ia = bas[ib*BAS_SLOTS+ATOM_OF]; + alpha = env[bas[ib*BAS_SLOTS+PTR_EXP]]; + coeff = env[bas[ib*BAS_SLOTS+PTR_COEFF]]; + charge = (double)atm[ia*ATM_SLOTS+CHARGE_OF]; + r0 = env + atm[ia*ATM_SLOTS+PTR_COORD]; + fac = -charge * coeff; + rad = env[atm[ia*ATM_SLOTS+PTR_RADIUS]]; + eval_rho(rho_priv, dm, 1, 0, 0, alpha, 0., r0, r0, + fac, rad, dimension, dh, a, b, mesh, cache); + } + free(cache); + + NPomp_dsum_reduce_inplace(rhobufs, ngrids); + if (thread_id != 0) { + free(rho_priv); + } +} +} + + + + +static void make_pgfparis_orth( + PGFPair* pgfpair, int comp, int dimension, + double* dh, double *a, double *b, int *mesh, + double* ish_gto_norm, double* jsh_gto_norm, + int *ish_atm, int *ish_bas, double *ish_env, + int *jsh_atm, int *jsh_bas, double *jsh_env, + double* Ls, double *cache) +{ + int ish = pgfpair->ish; + int jsh = pgfpair->jsh; + int ipgf = pgfpair->ipgf; + int jpgf = pgfpair->jpgf; + int iL = pgfpair->iL; + double cutoff = pgfpair->radius; + + double *ri = ish_env + ish_atm[PTR_COORD+ish_bas[ATOM_OF+ish*BAS_SLOTS]*ATM_SLOTS]; + double *rj = jsh_env + jsh_atm[PTR_COORD+jsh_bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS]; + double *rL = Ls + iL*3; + double rjL[3]; + rjL[0] = rj[0] + rL[0]; + rjL[1] = rj[1] + rL[1]; + rjL[2] = rj[2] + rL[2]; + + const int li = ish_bas[ANG_OF+ish*BAS_SLOTS]; + const int lj = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + double ai = ish_env[ish_bas[PTR_EXP+ish*BAS_SLOTS]+ipgf]; + double aj = jsh_env[jsh_bas[PTR_EXP+jsh*BAS_SLOTS]+jpgf]; + double ci = ish_gto_norm[ipgf]; + double cj = jsh_gto_norm[jpgf]; + double aij = ai + aj; + double rrij = CINTsquare_dist(ri, rjL); + double eij = (ai * aj / aij) * rrij; + if (eij > EIJCUTOFF) { + return; + } + double fac = exp(-eij) * ci * cj * CINTcommon_fac_sp(li) * CINTcommon_fac_sp(lj); + if (fac < ish_env[PTR_EXPDROP] && fac < jsh_env[PTR_EXPDROP]) { + return; + } + + int topl = li + lj; + int grid_slice[6]; + double *xs_exp, *ys_exp, *zs_exp; + int data_size = init_orth_data(&xs_exp, &ys_exp, &zs_exp, + grid_slice, dh, mesh, topl, cutoff, + ai, aj, ri, rj, cache); +} + + +void eval_pgfpairs(TaskList** task_list, + int comp, int hermi, int *shls_slice, int* ish_ao_loc, int* jsh_ao_loc, + int dimension, double* Ls, double* a, double* b, + int* ish_atm, int* ish_bas, double* ish_env, + int* jsh_atm, int* jsh_bas, double* jsh_env, int cart) +{ + TaskList* tl = *task_list; + GridLevel_Info* gridlevel_info = tl->gridlevel_info; + int nlevels = gridlevel_info->nlevels; + + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + //const int nijsh = nish * njsh; + //const int naoi = ish_ao_loc[ish1] - ish_ao_loc[ish0]; + //const int naoj = jsh_ao_loc[jsh1] - jsh_ao_loc[jsh0]; + + double **gto_norm_i = (double**) malloc(sizeof(double*) * nish); + double **cart2sph_coeff_i = (double**) malloc(sizeof(double*) * nish); + get_cart2sph_coeff(cart2sph_coeff_i, gto_norm_i, ish0, ish1, ish_bas, ish_env, cart); + double **gto_norm_j = gto_norm_i; + double **cart2sph_coeff_j = cart2sph_coeff_i; + if (hermi != 1) { + gto_norm_j = (double**) malloc(sizeof(double*) * njsh); + cart2sph_coeff_j = (double**) malloc(sizeof(double*) * njsh); + get_cart2sph_coeff(cart2sph_coeff_j, gto_norm_j, jsh0, jsh1, jsh_bas, jsh_env, cart); + } + + int ish_lmax = get_lmax(ish0, ish1, ish_bas); + int jsh_lmax = ish_lmax; + if (hermi != 1) { + jsh_lmax = get_lmax(jsh0, jsh1, jsh_bas); + } + + int ish_nprim_max = get_nprim_max(ish0, ish1, ish_bas); + int jsh_nprim_max = ish_nprim_max; + if (hermi != 1) { + jsh_nprim_max = get_nprim_max(jsh0, jsh1, jsh_bas); + } + + int ish_nctr_max = get_nctr_max(ish0, ish1, ish_bas); + int jsh_nctr_max = ish_nctr_max; + if (hermi != 1) { + jsh_nctr_max = get_nctr_max(jsh0, jsh1, jsh_bas); + } + + int ilevel; + int *mesh; + double max_radius; + Task* task; + size_t ntasks; + PGFPair** pgfpairs; + for (ilevel = 0; ilevel < nlevels; ilevel++) { + task = (tl->tasks)[ilevel]; + ntasks = task->ntasks; + if (ntasks <= 0) { + continue; + } + pgfpairs = task->pgfpairs; + max_radius = task->radius; + + mesh = gridlevel_info->mesh + ilevel*3; + + double dh[9]; + get_grid_spacing(dh, a, mesh); + + int *task_loc; + int nblock = get_task_loc(&task_loc, pgfpairs, ntasks, ish0, ish1, jsh0, jsh1, hermi); + + size_t cache_size = _rho_cache_size(MAX(ish_lmax,jsh_lmax), + MAX(ish_nprim_max, jsh_nprim_max), + MAX(ish_nctr_max, jsh_nctr_max), mesh, max_radius, dh); + //size_t ngrids = ((size_t)mesh[0]) * mesh[1] * mesh[2]; + +#pragma omp parallel +{ + PGFPair *pgfpair = NULL; + int iblock, itask, ish, jsh; + double *ptr_gto_norm_i, *ptr_gto_norm_j; + double *cache = malloc(sizeof(double) * cache_size); + + #pragma omp for schedule(dynamic) + for (iblock = 0; iblock < nblock; iblock+=2) { + itask = task_loc[iblock]; + pgfpair = pgfpairs[itask]; + ish = pgfpair->ish; + jsh = pgfpair->jsh; + ptr_gto_norm_i = gto_norm_i[ish]; + ptr_gto_norm_j = gto_norm_j[jsh]; + for (; itask < task_loc[iblock+1]; itask++) { + pgfpair = pgfpairs[itask]; + make_pgfparis_orth(pgfpair, comp, dimension, dh, a, b, mesh, + ptr_gto_norm_i, ptr_gto_norm_j, ish_atm, ish_bas, ish_env, + jsh_atm, jsh_bas, jsh_env, Ls, cache); + } + } + + free(cache); +} + if (task_loc) { + free(task_loc); + } + } // loop ilevel + + del_cart2sph_coeff(cart2sph_coeff_i, gto_norm_i, ish0, ish1); + if (hermi != 1) { + del_cart2sph_coeff(cart2sph_coeff_j, gto_norm_j, jsh0, jsh1); + } +} diff --git a/pyscf/lib/dft/grid_common.c b/pyscf/lib/dft/grid_common.c new file mode 100644 index 0000000000..f7e198ab17 --- /dev/null +++ b/pyscf/lib/dft/grid_common.c @@ -0,0 +1,660 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include +#include "config.h" +#include "cint.h" +#include "vhf/fblas.h" +#include "np_helper/np_helper.h" +#include "dft/multigrid.h" +#include "dft/grid_common.h" + +#define EXPMIN -700 + + +int get_lmax(int ish0, int ish1, int* bas) +{ + int lmax = 0; + int ish; + for (ish = ish0; ish < ish1; ish++) { + lmax = MAX(lmax, bas[ANG_OF+ish*BAS_SLOTS]); + } + return lmax; +} + + +int get_nprim_max(int ish0, int ish1, int* bas) +{ + int nprim_max = 1; + int ish; + for (ish = ish0; ish < ish1; ish++) { + nprim_max = MAX(nprim_max, bas[NPRIM_OF+ish*BAS_SLOTS]); + } + return nprim_max; +} + + +int get_nctr_max(int ish0, int ish1, int* bas) +{ + int nctr_max = 1; + int ish; + for (ish = ish0; ish < ish1; ish++) { + nctr_max = MAX(nctr_max, bas[NCTR_OF+ish*BAS_SLOTS]); + } + return nctr_max; +} + + +void get_cart2sph_coeff(double** contr_coeff, double** gto_norm, + int ish0, int ish1, int* bas, double* env, int cart) +{ + int l; + int lmax = get_lmax(ish0, ish1, bas); + int nprim, ncart, nsph, nctr; + int ptr_exp, ptr_coeff; + int ish, ipgf, ic, i, j; + + double **c2s = (double**) malloc(sizeof(double*) * (lmax+1)); + for (l = 0; l <= lmax; l++) { + ncart = _LEN_CART[l]; + if (l <= 1 || cart == 1) { + c2s[l] = (double*) calloc(ncart*ncart, sizeof(double)); + for (i = 0; i < ncart; i++) { + c2s[l][i*ncart + i] = 1; + } + } + else { + nsph = 2*l + 1; + c2s[l] = (double*) calloc(nsph*ncart, sizeof(double)); + double* gcart = (double*) calloc(ncart*ncart, sizeof(double)); + for (i = 0; i < ncart; i++) { + gcart[i*ncart + i] = 1; + } + CINTc2s_ket_sph(c2s[l], ncart, gcart, l); + free(gcart); + } + } + +#pragma omp parallel private (ish, ipgf, ic, i, j, l,\ + ncart, nsph, nprim, nctr,\ + ptr_exp, ptr_coeff) +{ + #pragma omp for schedule(dynamic) + for (ish = ish0; ish < ish1; ish++) { + l = bas[ANG_OF+ish*BAS_SLOTS]; + ncart = _LEN_CART[l]; + nsph = cart == 1 ? ncart : 2*l+1; + nprim = bas[NPRIM_OF+ish*BAS_SLOTS]; + nctr = bas[NCTR_OF+ish*BAS_SLOTS]; + + ptr_exp = bas[PTR_EXP+ish*BAS_SLOTS]; + gto_norm[ish] = (double*) malloc(sizeof(double) * nprim); + for (ipgf = 0; ipgf < nprim; ipgf++) { + gto_norm[ish][ipgf] = CINTgto_norm(l, env[ptr_exp+ipgf]); + } + + ptr_coeff = bas[PTR_COEFF+ish*BAS_SLOTS]; + double *buf = (double*) calloc(nctr*nprim, sizeof(double)); + for (ipgf = 0; ipgf < nprim; ipgf++) { + double inv_norm = 1./gto_norm[ish][ipgf]; + daxpy_(&nctr, &inv_norm, env+ptr_coeff+ipgf, &nprim, buf+ipgf, &nprim); + } + + contr_coeff[ish] = (double*) malloc(sizeof(double) * nprim*ncart*nctr*nsph); + double* ptr_contr_coeff = contr_coeff[ish]; + for (ipgf = 0; ipgf < nprim; ipgf++) { + for (i = 0; i < ncart; i++) { + for (ic = 0; ic < nctr; ic++) { + for (j = 0; j < nsph; j++) { + *ptr_contr_coeff = buf[ic*nprim+ipgf] * c2s[l][j*ncart+i]; + ptr_contr_coeff += 1; + } + } + } + } + free(buf); + } +} + + for (l = 0; l <= lmax; l++) { + free(c2s[l]); + } + free(c2s); +} + + +void del_cart2sph_coeff(double** contr_coeff, double** gto_norm, int ish0, int ish1) +{ + int ish; + for (ish = ish0; ish < ish1; ish++) { + if (contr_coeff[ish]) { + free(contr_coeff[ish]); + } + if (gto_norm[ish]) { + free(gto_norm[ish]); + } + } + free(contr_coeff); + free(gto_norm); +} + + +int get_max_num_grid_orth(double* dh, double radius) +{ + double dx = MIN(MIN(dh[0], dh[4]), dh[8]); + int ngrid = 2 * (int) ceil(radius / dx) + 1; + return ngrid; +} + + +void get_grid_spacing(double* dh, double* a, int* mesh) +{ + int i, j; + for (i = 0; i < 3; i++) { + for (j = 0; j < 3; j++) { + dh[i*3+j] = a[i*3+j] / mesh[i]; + } + } +} + + +int orth_components(double *xs_exp, int* bounds, double dx, double radius, + double xi, double xj, double ai, double aj, + int nx_per_cell, int topl, double *cache) +{ + double aij = ai + aj; + double xij = (ai * xi + aj * xj) / aij; + int x0_latt = (int) floor((xij - radius) / dx); + int x1_latt = (int) ceil((xij + radius) / dx); + int xij_latt = rint(xij / dx); + xij_latt = MAX(xij_latt, x0_latt); + xij_latt = MIN(xij_latt, x1_latt); + bounds[0] = x0_latt; + bounds[1] = x1_latt; + int ngridx = x1_latt - x0_latt; + + double base_x = dx * xij_latt; + double x0xij = base_x - xij; + double _x0x0 = -aij * x0xij * x0xij; + if (_x0x0 < EXPMIN) { + return 0; + } + + double *gridx = cache; + double *xs_all = xs_exp; + if (ngridx >= nx_per_cell) { + xs_all = gridx + ngridx; + } + + double _dxdx = -aij * dx * dx; + double _x0dx = -2 * aij * x0xij * dx; + double exp_dxdx = exp(_dxdx); + double exp_2dxdx = exp_dxdx * exp_dxdx; + double exp_x0dx = exp(_x0dx + _dxdx); + double exp_x0x0 = exp(_x0x0); + + int i; + int istart = xij_latt - x0_latt; + for (i = istart; i < ngridx; i++) { + xs_all[i] = exp_x0x0; + exp_x0x0 *= exp_x0dx; + exp_x0dx *= exp_2dxdx; + } + + exp_x0dx = exp(_dxdx - _x0dx); + exp_x0x0 = exp(_x0x0); + for (i = istart-1; i >= 0; i--) { + exp_x0x0 *= exp_x0dx; + exp_x0dx *= exp_2dxdx; + xs_all[i] = exp_x0x0; + } + + if (topl > 0) { + double x0xi = x0_latt * dx - xi; + for (i = 0; i < ngridx; i++) { + gridx[i] = x0xi + i * dx; + } + int l; + double *px0; + for (l = 1; l <= topl; l++) { + px0 = xs_all + (l-1) * ngridx; + for (i = 0; i < ngridx; i++) { + px0[ngridx+i] = px0[i] * gridx[i]; + } + } + } + + // add up contributions from all images to the referece image + if (ngridx >= nx_per_cell) { + memset(xs_exp, 0, (topl+1)*nx_per_cell*sizeof(double)); + int ix, l, lb, ub, size_x; + for (ix = 0; ix < ngridx; ix++) { + lb = modulo(ix + x0_latt, nx_per_cell); + ub = get_upper_bound(lb, nx_per_cell, ix, ngridx); + size_x = ub - lb; + double* __restrict ptr_xs_exp = xs_exp + lb; + double* __restrict ptr_xs_all = xs_all + ix; + for (l = 0; l <= topl; l++) { + #pragma omp simd + for (i = 0; i < size_x; i++) { + ptr_xs_exp[i] += ptr_xs_all[i]; + } + ptr_xs_exp += nx_per_cell; + ptr_xs_all += ngridx; + } + ix += size_x - 1; + } + + bounds[0] = 0; + bounds[1] = nx_per_cell; + ngridx = nx_per_cell; + } + return ngridx; +} + + +int _orth_components(double *xs_exp, int *img_slice, int *grid_slice, + double a, double b, double cutoff, + double xi, double xj, double ai, double aj, + int periodic, int nx_per_cell, int topl, double *cache) +{ + double aij = ai + aj; + double xij = (ai * xi + aj * xj) / aij; + double heights_inv = b; + double xij_frac = xij * heights_inv; + double edge0 = xij_frac - cutoff * heights_inv; + double edge1 = xij_frac + cutoff * heights_inv; + + if (edge0 == edge1) { + return 0; + } + + int nimg0 = 0; + int nimg1 = 1; + if (periodic) { + nimg0 = (int) floor(edge0); + nimg1 = (int) ceil(edge1); + } + + int nimg = nimg1 - nimg0; + + int nmx0 = nimg0 * nx_per_cell; + int nmx1 = nimg1 * nx_per_cell; + int nmx = nmx1 - nmx0; + + int nx0 = (int) floor(edge0 * nx_per_cell); + int nx1 = (int) ceil(edge1 * nx_per_cell); + + int nx0_edge = nx0 - nmx0; + int nx1_edge = nx1 - nmx0; + + if (periodic) { + nx0 = nx0_edge % nx_per_cell; + nx1 = nx1_edge % nx_per_cell; + if (nx1 == 0) { + nx1 = nx_per_cell; + } + } + assert(nx0 == nx0_edge); + + img_slice[0] = nimg0; + img_slice[1] = nimg1; + grid_slice[0] = nx0; + grid_slice[1] = nx1; + + int ngridx = _num_grids_on_x(nimg, nx0, nx1, nx_per_cell); + if (ngridx == 0) { + return 0; + } + + int i, m, l; + double *px0; + + double *gridx = cache; + double *xs_all = cache + nmx; + if (nimg == 1) { + xs_all = xs_exp; + } + + int grid_close_to_xij = rint(xij_frac * nx_per_cell) - nmx0; + grid_close_to_xij = MIN(grid_close_to_xij, nx1_edge); + grid_close_to_xij = MAX(grid_close_to_xij, nx0_edge); + + double img0_x = a * nimg0; + double dx = a / nx_per_cell; + double base_x = img0_x + dx * grid_close_to_xij; + double x0xij = base_x - xij; + double _x0x0 = -aij * x0xij * x0xij; + if (_x0x0 < EXPMIN) { + return 0; + } + + double _dxdx = -aij * dx * dx; + double _x0dx = -2 * aij * x0xij * dx; + double exp_dxdx = exp(_dxdx); + double exp_2dxdx = exp_dxdx * exp_dxdx; + double exp_x0dx = exp(_x0dx + _dxdx); + double exp_x0x0 = exp(_x0x0); + + for (i = grid_close_to_xij; i < nx1_edge; i++) { + xs_all[i] = exp_x0x0; + exp_x0x0 *= exp_x0dx; + exp_x0dx *= exp_2dxdx; + } + + exp_x0dx = exp(_dxdx - _x0dx); + exp_x0x0 = exp(_x0x0); + for (i = grid_close_to_xij-1; i >= nx0_edge; i--) { + exp_x0x0 *= exp_x0dx; + exp_x0dx *= exp_2dxdx; + xs_all[i] = exp_x0x0; + } + + if (topl > 0) { + double x0xi = img0_x - xi; + for (i = nx0_edge; i < nx1_edge; i++) { + gridx[i] = x0xi + i * dx; + } + for (l = 1; l <= topl; l++) { + px0 = xs_all + (l-1) * nmx; + for (i = nx0_edge; i < nx1_edge; i++) { + px0[nmx+i] = px0[i] * gridx[i]; + } + } + } + + int idx1; + if (nimg > 1) { + for (l = 0; l <= topl; l++) { + px0 = xs_all + l * nmx; + for (i = nx0; i < nx_per_cell; i++) { + xs_exp[l*nx_per_cell+i] = px0[i]; + } + memset(xs_exp+l*nx_per_cell, 0, nx0*sizeof(double)); + for (m = 1; m < nimg; m++) { + px0 = xs_all + l * nmx + m*nx_per_cell; + idx1 = (m == nimg - 1) ? nx1 : nx_per_cell; + for (i = 0; i < idx1; i++) { + xs_exp[l*nx_per_cell+i] += px0[i]; + } + } + } + } + return ngridx; +} + + +int init_orth_data(double **xs_exp, double **ys_exp, double **zs_exp, + int *grid_slice, double* dh, int* mesh, int topl, double radius, + double ai, double aj, double *ri, double *rj, double *cache) +{ + int l1 = topl + 1; + *xs_exp = cache; + *ys_exp = *xs_exp + l1 * mesh[0]; + *zs_exp = *ys_exp + l1 * mesh[1]; + int data_size = l1 * (mesh[0] + mesh[1] + mesh[2]); + cache += data_size; + + int ngridx = orth_components(*xs_exp, grid_slice, dh[0], radius, + ri[0], rj[0], ai, aj, mesh[0], topl, cache); + if (ngridx == 0) { + return 0; + } + + int ngridy = orth_components(*ys_exp, grid_slice+2, dh[4], radius, + ri[1], rj[1], ai, aj, mesh[1], topl, cache); + if (ngridy == 0) { + return 0; + } + + int ngridz = orth_components(*zs_exp, grid_slice+4, dh[8], radius, + ri[2], rj[2], ai, aj, mesh[2], topl, cache); + if (ngridz == 0) { + return 0; + } + + return data_size; +} + + +int _init_orth_data(double **xs_exp, double **ys_exp, double **zs_exp, + int *img_slice, int *grid_slice, int *mesh, + int topl, int dimension, double cutoff, + double ai, double aj, double *ri, double *rj, + double *a, double *b, double *cache) +{ + int l1 = topl + 1; + *xs_exp = cache; + *ys_exp = *xs_exp + l1 * mesh[0]; + *zs_exp = *ys_exp + l1 * mesh[1]; + int data_size = l1 * (mesh[0] + mesh[1] + mesh[2]); + cache += data_size; + + int ngridx = _orth_components(*xs_exp, img_slice, grid_slice, + a[0], b[0], cutoff, ri[0], rj[0], ai, aj, + (dimension>=1), mesh[0], topl, cache); + if (ngridx == 0) { + return 0; + } + + int ngridy = _orth_components(*ys_exp, img_slice+2, grid_slice+2, + a[4], b[4], cutoff, ri[1], rj[1], ai, aj, + (dimension>=2), mesh[1], topl, cache); + if (ngridy == 0) { + return 0; + } + + int ngridz = _orth_components(*zs_exp, img_slice+4, grid_slice+4, + a[8], b[8], cutoff, ri[2], rj[2], ai, aj, + (dimension>=3), mesh[2], topl, cache); + if (ngridz == 0) { + return 0; + } + + return data_size; +} + + +void _get_dm_to_dm_xyz_coeff(double* coeff, double* rij, int lmax, double* cache) +{ + int l1 = lmax + 1; + int l, lx; + + double *rx_pow = cache; + double *ry_pow = rx_pow + l1; + double *rz_pow = ry_pow + l1; + + rx_pow[0] = 1.0; + ry_pow[0] = 1.0; + rz_pow[0] = 1.0; + for (lx = 1; lx <= lmax; lx++) { + rx_pow[lx] = rx_pow[lx-1] * rij[0]; + ry_pow[lx] = ry_pow[lx-1] * rij[1]; + rz_pow[lx] = rz_pow[lx-1] * rij[2]; + } + + int dj = _LEN_CART[lmax]; + double *pcx = coeff; + double *pcy = pcx + dj; + double *pcz = pcy + dj; + for (l = 0; l <= lmax; l++){ + for (lx = 0; lx <= l; lx++) { + pcx[lx] = BINOMIAL(l, lx) * rx_pow[l-lx]; + pcy[lx] = BINOMIAL(l, lx) * ry_pow[l-lx]; + pcz[lx] = BINOMIAL(l, lx) * rz_pow[l-lx]; + } + pcx += l+1; + pcy += l+1; + pcz += l+1; + } +} + + +void _dm_to_dm_xyz(double* dm_xyz, double* dm, int li, int lj, double* ri, double* rj, double* cache) +{ + int lx, ly, lz; + int lx_i, ly_i, lz_i; + int lx_j, ly_j, lz_j; + int jx, jy, jz; + double rij[3]; + + rij[0] = ri[0] - rj[0]; + rij[1] = ri[1] - rj[1]; + rij[2] = ri[2] - rj[2]; + + int l1 = li + lj + 1; + int l1l1 = l1 * l1; + double *coeff = cache; + int dj = _LEN_CART[lj]; + cache += 3 * dj; + + _get_dm_to_dm_xyz_coeff(coeff, rij, lj, cache); + + double cx, cxy, cxyz; + double *pcx = coeff; + double *pcy = pcx + dj; + double *pcz = pcy + dj; + double *pdm = dm; + for (lx_i = li; lx_i >= 0; lx_i--) { + for (ly_i = li-lx_i; ly_i >= 0; ly_i--) { + lz_i = li - lx_i - ly_i; + for (lx_j = lj; lx_j >= 0; lx_j--) { + for (ly_j = lj-lx_j; ly_j >= 0; ly_j--) { + lz_j = lj - lx_j - ly_j; + for (jx = 0; jx <= lx_j; jx++) { + cx = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jy = 0; jy <= ly_j; jy++) { + cxy = cx * pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + for (jz = 0; jz <= lz_j; jz++) { + cxyz = cxy * pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + dm_xyz[lx*l1l1+ly*l1+lz] += cxyz * pdm[0]; + } + } + } + pdm += 1; + } + } + } + } +} + + +void _dm_xyz_to_dm(double* dm_xyz, double* dm, int li, int lj, double* ri, double* rj, double* cache) +{ + int lx, ly, lz; + int lx_i, ly_i, lz_i; + int lx_j, ly_j, lz_j; + int jx, jy, jz; + double rij[3]; + + rij[0] = ri[0] - rj[0]; + rij[1] = ri[1] - rj[1]; + rij[2] = ri[2] - rj[2]; + + int l1 = li + lj + 1; + int l1l1 = l1 * l1; + double *coeff = cache; + int dj = _LEN_CART[lj]; + cache += 3 * dj; + + _get_dm_to_dm_xyz_coeff(coeff, rij, lj, cache); + + double cx, cy, cz; + double *pcx = coeff; + double *pcy = pcx + dj; + double *pcz = pcy + dj; + double *pdm = dm; + for (lx_i = li; lx_i >= 0; lx_i--) { + for (ly_i = li-lx_i; ly_i >= 0; ly_i--) { + lz_i = li - lx_i - ly_i; + for (lx_j = lj; lx_j >= 0; lx_j--) { + for (ly_j = lj-lx_j; ly_j >= 0; ly_j--) { + lz_j = lj - lx_j - ly_j; + for (jx = 0; jx <= lx_j; jx++) { + cx = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jy = 0; jy <= ly_j; jy++) { + cy = pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + for (jz = 0; jz <= lz_j; jz++) { + cz = pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + pdm[0] += cx*cy*cz * dm_xyz[lx*l1l1+ly*l1+lz]; + } + } + } + pdm += 1; + } + } + } + } +} + + +void get_dm_pgfpair(double* dm_pgf, double* dm_cart, + PGFPair* pgfpair, int* ish_bas, int* jsh_bas, int hermi) +{ + int ish = pgfpair->ish; + int jsh = pgfpair->jsh; + int ipgf = pgfpair->ipgf; + int jpgf = pgfpair->jpgf; + + int li = ish_bas[ANG_OF+ish*BAS_SLOTS]; + int lj = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + int di = _LEN_CART[li]; + int dj = _LEN_CART[lj]; + + int nprim_j = jsh_bas[NPRIM_OF+jsh*BAS_SLOTS]; + int ncol = nprim_j * dj; + double *pdm = dm_cart + (ipgf*di*ncol + jpgf*dj); + double *pdm_pgf = dm_pgf; + int i, j; + for (i = 0; i < di; i++) { + for (j = 0; j < dj; j++) { + pdm_pgf[j] = pdm[j]; + } + pdm_pgf += dj; + pdm += ncol; + } + + /* + if (hermi == 1 && ish == jsh) { + assert(di == dj); + for (i = 0; i < di; i++) { + for (j = i+1; j < dj; j++) { + dm_pgf[i*dj+j] *= 2; + dm_pgf[j*dj+i] = 0; + } + } + }*/ + if (hermi == 1 && ish != jsh) { + pdm_pgf = dm_pgf; + for (i = 0; i < di; i++) { + for (j = 0; j < dj; j++) { + pdm_pgf[j] *= 2; + } + pdm_pgf += dj; + } + } +} diff --git a/pyscf/lib/dft/grid_common.h b/pyscf/lib/dft/grid_common.h new file mode 100644 index 0000000000..36dc7e3655 --- /dev/null +++ b/pyscf/lib/dft/grid_common.h @@ -0,0 +1,109 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#ifndef HAVE_DEFINED_GRID_COMMON_H +#define HAVE_DEFINED_GRID_COMMON_H + +#include "cint.h" + +#define EIJCUTOFF 60 +#define PTR_EXPDROP 16 + +extern double CINTsquare_dist(const double *r1, const double *r2); +extern double CINTcommon_fac_sp(int l); + +int get_lmax(int ish0, int ish1, int* bas); +int get_nprim_max(int ish0, int ish1, int* bas); +int get_nctr_max(int ish0, int ish1, int* bas); +void get_cart2sph_coeff(double** contr_coeff, double** gto_norm, + int ish0, int ish1, int* bas, double* env, int cart); +void del_cart2sph_coeff(double** contr_coeff, double** gto_norm, int ish0, int ish1); + +static inline int _has_overlap(int nx0, int nx1, int nx_per_cell) +{ + return nx0 <= nx1; +} + +static inline int _num_grids_on_x(int nimgx, int nx0, int nx1, int nx_per_cell) +{ + int ngridx; + if (nimgx == 1) { + ngridx = nx1 - nx0; + } else if (nimgx == 2 && !_has_overlap(nx0, nx1, nx_per_cell)) { + ngridx = nx1 - nx0 + nx_per_cell; + } else { + ngridx = nx_per_cell; + } + return ngridx; +} + + +static inline void _get_grid_mapping(int* xmap, int nx0, int nx1, int ngridx, int nimgx, bool is_x_split) +{ + int ix, nx; + if (nimgx == 1) { + for (ix = 0; ix < ngridx; ix++) { + xmap[ix] = ix + nx0; + } + } else if (is_x_split) { + for (ix = 0; ix < nx1; ix++) { + xmap[ix] = ix; + } + nx = nx0 - nx1; + for (ix = nx1; ix < ngridx; ix++) { + xmap[ix] = ix + nx; + } + } else { + for (ix = 0; ix < ngridx; ix++) { + xmap[ix] = ix; + } + } +} + + +static inline int modulo(int i, int n) +{ + return (i % n + n) % n; +} + + +static inline int get_upper_bound(int x0, int nx_per_cell, int ix, int ngridx) +{ + return x0 + MIN(nx_per_cell - x0, ngridx - ix); +} + +int _orth_components(double *xs_exp, int *img_slice, int *grid_slice, + double a, double b, double cutoff, + double xi, double xj, double ai, double aj, + int periodic, int nx_per_cell, int topl, double *cache); +int _init_orth_data(double **xs_exp, double **ys_exp, double **zs_exp, + int *img_slice, int *grid_slice, int *mesh, + int topl, int dimension, double cutoff, + double ai, double aj, double *ri, double *rj, + double *a, double *b, double *cache); + +int init_orth_data(double **xs_exp, double **ys_exp, double **zs_exp, + int *grid_slice, double* dh, int* mesh, int topl, double radius, + double ai, double aj, double *ri, double *rj, double *cache); +void get_grid_spacing(double* dh, double* a, int* mesh); + +void _get_dm_to_dm_xyz_coeff(double* coeff, double* rij, int lmax, double* cache); +void _dm_to_dm_xyz(double* dm_xyz, double* dm, int li, int lj, double* ri, double* rj, double* cache); +void _dm_xyz_to_dm(double* dm_xyz, double* dm, int li, int lj, double* ri, double* rj, double* cache); +void get_dm_pgfpair(double* dm_pgf, double* dm_cart, + PGFPair* pgfpair, int* ish_bas, int* jsh_bas, int hermi); +int get_max_num_grid_orth(double* dh, double radius); +#endif diff --git a/pyscf/lib/dft/grid_integrate.c b/pyscf/lib/dft/grid_integrate.c new file mode 100644 index 0000000000..9cabe864cb --- /dev/null +++ b/pyscf/lib/dft/grid_integrate.c @@ -0,0 +1,1358 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include "config.h" +#include "vhf/fblas.h" +#include "np_helper/np_helper.h" +#include "dft/multigrid.h" +#include "dft/grid_common.h" +#include "dft/utils.h" + +#define PTR_RADIUS 5 + + +void transform_dm_inverse(double* dm_cart, double* dm, int comp, + double* ish_contr_coeff, double* jsh_contr_coeff, + int* ish_ao_loc, int* jsh_ao_loc, + int* ish_bas, int* jsh_bas, int ish, int jsh, + int ish0, int jsh0, int naoi, int naoj, double* cache) +{ + int i0 = ish_ao_loc[ish] - ish_ao_loc[ish0]; + int i1 = ish_ao_loc[ish+1] - ish_ao_loc[ish0]; + int j0 = jsh_ao_loc[jsh] - jsh_ao_loc[jsh0]; + int j1 = jsh_ao_loc[jsh+1] - jsh_ao_loc[jsh0]; + + int nrow = i1 - i0; + int ncol = j1 - j0; + double* pdm = dm + ((size_t)naoj) * i0 + j0; + + int l_i = ish_bas[ANG_OF+ish*BAS_SLOTS]; + int ncart_i = _LEN_CART[l_i]; + int nprim_i = ish_bas[NPRIM_OF+ish*BAS_SLOTS]; + int nao_i = nprim_i*ncart_i; + int l_j = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + int ncart_j = _LEN_CART[l_j]; + int nprim_j = jsh_bas[NPRIM_OF+jsh*BAS_SLOTS]; + int nao_j = nprim_j*ncart_j; + + const char TRANS_T = 'T'; + const char TRANS_N = 'N'; + const double D1 = 1; + const double D0 = 0; + double *buf = cache; + + int ic; + for (ic = 0; ic < comp; ic++) { + //einsum("pi,pq,qj->ij", coeff_i, dm_cart, coeff_j) + dgemm_(&TRANS_N, &TRANS_N, &ncol, &nao_i, &nao_j, + &D1, jsh_contr_coeff, &ncol, dm_cart, &nao_j, &D0, buf, &ncol); + dgemm_(&TRANS_N, &TRANS_T, &ncol, &nrow, &nao_i, + &D1, buf, &ncol, ish_contr_coeff, &nrow, &D0, pdm, &naoj); + pdm += ((size_t)naoi) * naoj; + dm_cart += nao_i * nao_j; + } +} + + +static void fill_tril(double* mat, int comp, int* ish_ao_loc, int* jsh_ao_loc, + int ish, int jsh, int ish0, int jsh0, int naoi, int naoj) +{ + int i0 = ish_ao_loc[ish] - ish_ao_loc[ish0]; + int i1 = ish_ao_loc[ish+1] - ish_ao_loc[ish0]; + int j0 = jsh_ao_loc[jsh] - jsh_ao_loc[jsh0]; + int j1 = jsh_ao_loc[jsh+1] - jsh_ao_loc[jsh0]; + int ni = i1 - i0; + int nj = j1 - j0; + size_t nao2 = ((size_t)naoi) * naoj; + + double *pmat_up = mat + i0*((size_t)naoj) + j0; + double *pmat_low = mat + j0*((size_t)naoj) + i0; + int ic, i, j; + for (ic = 0; ic < comp; ic++) { + for (i = 0; i < ni; i++) { + for (j = 0; j < nj; j++) { + pmat_low[j*naoj+i] = pmat_up[i*naoj+j]; + } + } + pmat_up += nao2; + pmat_low += nao2; + } +} + + +static void integrate_submesh(double* out, double* weights, + double* xs_exp, double* ys_exp, double* zs_exp, + double fac, int topl, + int* mesh_lb, int* mesh_ub, int* submesh_lb, + const int* mesh, const int* submesh, double* cache) +{ + const int l1 = topl + 1; + const int l1l1 = l1 * l1; + const int x0 = mesh_lb[0]; + const int y0 = mesh_lb[1]; + const int z0 = mesh_lb[2]; + + const int nx = mesh_ub[0] - x0; + const int ny = mesh_ub[1] - y0; + const int nz = mesh_ub[2] - z0; + + const int x0_sub = submesh_lb[0]; + const int y0_sub = submesh_lb[1]; + const int z0_sub = submesh_lb[2]; + + const size_t mesh_yz = ((size_t) mesh[1]) * mesh[2]; + + const char TRANS_N = 'N'; + const char TRANS_T = 'T'; + const double D0 = 0; + const double D1 = 1; + + double *lzlyx = cache; + double *zly = lzlyx + l1l1 * nx; + double *ptr_weights = weights + x0 * mesh_yz + y0 * mesh[2] + z0; + + int ix; + for (ix = 0; ix < nx; ix++) { + dgemm_wrapper(TRANS_N, TRANS_N, nz, l1, ny, + D1, ptr_weights, mesh[2], ys_exp+y0_sub, submesh[1], + D0, zly, nz); + dgemm_wrapper(TRANS_T, TRANS_N, l1, l1, nz, + D1, zs_exp+z0_sub, submesh[2], zly, nz, + D0, lzlyx+l1l1*ix, l1); + ptr_weights += mesh_yz; + } + dgemm_wrapper(TRANS_N, TRANS_N, l1l1, l1, nx, + fac, lzlyx, l1l1, xs_exp+x0_sub, submesh[0], + D1, out, l1l1); +} + + +static void _orth_ints(double *out, double *weights, int topl, double fac, + double *xs_exp, double *ys_exp, double *zs_exp, + int *grid_slice, int *mesh, double *cache) +{// NOTE: out is accumulated + const int nx0 = grid_slice[0]; + const int nx1 = grid_slice[1]; + const int ny0 = grid_slice[2]; + const int ny1 = grid_slice[3]; + const int nz0 = grid_slice[4]; + const int nz1 = grid_slice[5]; + const int ngridx = nx1 - nx0; + const int ngridy = ny1 - ny0; + const int ngridz = nz1 - nz0; + if (ngridx == 0 || ngridy == 0 || ngridz == 0) { + return; + } + + const int submesh[3] = {ngridx, ngridy, ngridz}; + int lb[3], ub[3]; + int ix, iy, iz; + for (ix = 0; ix < ngridx;) { + lb[0] = modulo(ix + nx0, mesh[0]); + ub[0] = get_upper_bound(lb[0], mesh[0], ix, ngridx); + for (iy = 0; iy < ngridy;) { + lb[1] = modulo(iy + ny0, mesh[1]); + ub[1] = get_upper_bound(lb[1], mesh[1], iy, ngridy); + for (iz = 0; iz < ngridz;) { + lb[2] = modulo(iz + nz0, mesh[2]); + ub[2] = get_upper_bound(lb[2], mesh[2], iz, ngridz); + int lb_sub[3] = {ix, iy, iz}; + integrate_submesh(out, weights, xs_exp, ys_exp, zs_exp, fac, topl, + lb, ub, lb_sub, mesh, submesh, cache); + iz += ub[2] - lb[2]; + } + iy += ub[1] - lb[1]; + } + ix += ub[0] - lb[0]; + } +} + + +#define VRHO_LOOP_IP1(X, Y, Z) \ + int lx, ly, lz; \ + int jx, jy, jz; \ + int l##X##_i_m1 = l##X##_i - 1; \ + int l##X##_i_p1 = l##X##_i + 1; \ + double cx, cy, cz, cfac; \ + double fac_i = -2.0 * ai; \ + for (j##Y = 0; j##Y <= l##Y##_j; j##Y++) { \ + c##Y = pc##Y[j##Y+_LEN_CART0[l##Y##_j]]; \ + l##Y = l##Y##_i + j##Y; \ + for (j##Z = 0; j##Z <= l##Z##_j; j##Z++) { \ + c##Z = pc##Z[j##Z+_LEN_CART0[l##Z##_j]]; \ + l##Z = l##Z##_i + j##Z; \ + cfac = c##Y * c##Z; \ + for (j##X = 0; j##X <= l##X##_j; j##X++) { \ + if (l##X##_i > 0) { \ + c##X = pc##X[j##X+_LEN_CART0[l##X##_j]] * l##X##_i; \ + l##X = l##X##_i_m1 + j##X; \ + pv1[0] += c##X * cfac * v1_xyz[lx*l1l1+ly*l1+lz]; \ + } \ + c##X = pc##X[j##X+_LEN_CART0[l##X##_j]] * fac_i; \ + l##X = l##X##_i_p1 + j##X; \ + pv1[0] += c##X * cfac * v1_xyz[lx*l1l1+ly*l1+lz]; \ + } \ + } \ + } + + +static void _vrho_loop_ip1_x(double* pv1, double* v1_xyz, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + VRHO_LOOP_IP1(x,y,z); +} + + +static void _vrho_loop_ip1_y(double* pv1, double* v1_xyz, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + VRHO_LOOP_IP1(y,x,z); +} + + +static void _vrho_loop_ip1_z(double* pv1, double* v1_xyz, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + VRHO_LOOP_IP1(z,x,y); +} + + +#define VSIGMA_LOOP(X, Y, Z) \ + int lx, ly, lz; \ + int jx, jy, jz; \ + int l##X##_i_m1 = l##X##_i - 1; \ + int l##X##_i_p1 = l##X##_i + 1; \ + int l##X##_j_m1 = l##X##_j - 1; \ + int l##X##_j_p1 = l##X##_j + 1; \ + double cx, cy, cz, cfac; \ + double fac_i = -2.0 * ai; \ + double fac_j = -2.0 * aj; \ + for (j##Y = 0; j##Y <= l##Y##_j; j##Y++) { \ + c##Y = pc##Y[j##Y+_LEN_CART0[l##Y##_j]]; \ + l##Y = l##Y##_i + j##Y; \ + for (j##Z = 0; j##Z <= l##Z##_j; j##Z++) { \ + c##Z = pc##Z[j##Z+_LEN_CART0[l##Z##_j]]; \ + l##Z = l##Z##_i + j##Z; \ + cfac = c##Y * c##Z; \ + for (j##X = 0; j##X <= l##X##_j_m1; j##X++) { \ + c##X = pc##X[j##X+_LEN_CART0[l##X##_j_m1]] * l##X##_j; \ + l##X = l##X##_i + j##X; \ + pv1[0] += c##X * cfac * v1_xyz[lx*l1l1+ly*l1+lz]; \ + } \ + for (j##X = 0; j##X <= l##X##_j_p1; j##X++) { \ + c##X = pc##X[j##X+_LEN_CART0[l##X##_j_p1]] * fac_j; \ + l##X = l##X##_i + j##X; \ + pv1[0] += c##X * cfac * v1_xyz[lx*l1l1+ly*l1+lz]; \ + } \ + for (j##X = 0; j##X <= l##X##_j; j##X++) { \ + if (l##X##_i > 0) { \ + c##X = pc##X[j##X+_LEN_CART0[l##X##_j]] * l##X##_i; \ + l##X = l##X##_i_m1 + j##X; \ + pv1[0] += c##X * cfac * v1_xyz[lx*l1l1+ly*l1+lz]; \ + } \ + c##X = pc##X[j##X+_LEN_CART0[l##X##_j]] * fac_i; \ + l##X = l##X##_i_p1 + j##X; \ + pv1[0] += c##X * cfac * v1_xyz[lx*l1l1+ly*l1+lz]; \ + } \ + } \ + } + + +static void _vsigma_loop_x(double* pv1, double* v1_xyz, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + VSIGMA_LOOP(x,y,z); +} + + +static void _vsigma_loop_y(double* pv1, double* v1_xyz, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + VSIGMA_LOOP(y,x,z); +} + + +static void _vsigma_loop_z(double* pv1, double* v1_xyz, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + VSIGMA_LOOP(z,x,y); +} + + +static void _v1_xyz_to_v1(void (*_v1_loop)(), double* v1_xyz, double* v1, + int li, int lj, double ai, double aj, + double* ri, double* rj, double* cache) +{ + int lx_i, ly_i, lz_i; + int lx_j, ly_j, lz_j; + double rij[3]; + + rij[0] = ri[0] - rj[0]; + rij[1] = ri[1] - rj[1]; + rij[2] = ri[2] - rj[2]; + + int l1 = li + lj + 2; + int l1l1 = l1 * l1; + double *coeff = cache; + int dj = _LEN_CART[lj+1]; + cache += 3 * dj; + + _get_dm_to_dm_xyz_coeff(coeff, rij, lj+1, cache); + + double *pcx = coeff; + double *pcy = pcx + dj; + double *pcz = pcy + dj; + double *pv1 = v1; + for (lx_i = li; lx_i >= 0; lx_i--) { + for (ly_i = li-lx_i; ly_i >= 0; ly_i--) { + lz_i = li - lx_i - ly_i; + for (lx_j = lj; lx_j >= 0; lx_j--) { + for (ly_j = lj-lx_j; ly_j >= 0; ly_j--) { + lz_j = lj - lx_j - ly_j; + _v1_loop(pv1, v1_xyz, pcx, pcy, pcz, ai, aj, + lx_i, ly_i, lz_i, lx_j, ly_j, lz_j, l1, l1l1); + pv1 += 1; + } + } + } + } +} + +/* +#define SUM_NABLA_I \ + if (lx_i > 0) { \ + pv1[0] += lx_i * cxyzj * v1x[(lx-1)*l1l1+ly*l1+lz]; \ + } \ + pv1[0] += fac_i * cxyzj * v1x[(lx+1)*l1l1+ly*l1+lz]; \ + if (ly_i > 0) { \ + pv1[0] += ly_i * cxyzj * v1y[lx*l1l1+(ly-1)*l1+lz]; \ + } \ + pv1[0] += fac_i * cxyzj * v1y[lx*l1l1+(ly+1)*l1+lz]; \ + if (lz_i > 0) { \ + pv1[0] += lz_i * cxyzj * v1z[lx*l1l1+ly*l1+lz-1]; \ + } \ + pv1[0] += fac_i * cxyzj * v1z[lx*l1l1+ly*l1+lz+1]; +*/ +/* +static void _vsigma_loop_ip1ip2_x(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + int lx, ly, lz; + int jx, jy, jz; + int lx_j_m1 = lx_j - 1; + int lx_j_p1 = lx_j + 1; + double cxj, cyj, czj, cyzj, cxyzj; + double fac_i = -2.0 * ai; + double fac_j = -2.0 * aj; + + for (jy = 0; jy <= ly_j; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + for (jz = 0; jz <= lz_j; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + cyzj = cyj * czj; + for (jx = 0; jx <= lx_j_m1; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j_m1]] * lx_j; + cxyzj = cxj * cyzj; + lx = lx_i + jx; + SUM_NABLA_I; + } + for (jx = 0; jx <= lx_j_p1; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j_p1]] * fac_j; + cxyzj = cxj * cyzj; + lx = lx_i + jx; + SUM_NABLA_I; + } + } + } +} +*/ + +#define COMMON_INIT(x) \ + int l##x##_i; \ + int lx, ly, lz; \ + int jx, jy, jz; \ + int lx_j_m1 = lx_j - 1; \ + int lx_j_p1 = lx_j + 1; \ + int ly_j_m1 = ly_j - 1; \ + int ly_j_p1 = ly_j + 1; \ + int lz_j_m1 = lz_j - 1; \ + int lz_j_p1 = lz_j + 1; \ + double ci; \ + double cxj, cyj, czj; \ + double cyzj, cxzj, cxyj, cxyzj; \ + double fac_i = -2.0 * ai; \ + double fac_j = -2.0 * aj; \ + + +#define SUM_NABLA_J(x, y, z) \ + for (j##y = 0; j##y <= l##y##_j; j##y++) { \ + c##y##j = pc##y[j##y+_LEN_CART0[l##y##_j]]; \ + l##y = l##y##_i + j##y; \ + for (j##z = 0; j##z <= l##z##_j; j##z++) { \ + c##z##j = pc##z[j##z+_LEN_CART0[l##z##_j]]; \ + l##z = l##z##_i + j##z; \ + c##y##z##j = c##y##j * c##z##j; \ + for (j##x = 0; j##x <= l##x##_j_m1; j##x++) { \ + c##x##j = pc##x[j##x+_LEN_CART0[l##x##_j_m1]] * l##x##_j; \ + cxyzj = c##x##j * c##y##z##j; \ + l##x = l##x##_i + j##x; \ + pv1[0] += ci * cxyzj * v1##x[lx*l1l1+ly*l1+lz]; \ + } \ + for (j##x = 0; j##x <= l##x##_j_p1; j##x++) { \ + c##x##j = pc##x[j##x+_LEN_CART0[l##x##_j_p1]] * fac_j; \ + cxyzj = c##x##j * c##y##z##j; \ + l##x = l##x##_i + j##x; \ + pv1[0] += ci * cxyzj * v1##x[lx*l1l1+ly*l1+lz]; \ + } \ + } \ + } + + +static void _vsigma_loop_ip1ip2_x(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i0, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + COMMON_INIT(x); + + lx_i = lx_i0 + 1; + ci = fac_i; + SUM_NABLA_J(x,y,z); + SUM_NABLA_J(y,x,z); + SUM_NABLA_J(z,x,y); + + if (lx_i0 > 0) { + lx_i = lx_i0 - 1; + ci = lx_i0; + SUM_NABLA_J(x,y,z); + SUM_NABLA_J(y,x,z); + SUM_NABLA_J(z,x,y); + } +} + +/* +static void _vsigma_loop_ip1ip2_y(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + int lx, ly, lz; + int jx, jy, jz; + int ly_j_m1 = ly_j - 1; + int ly_j_p1 = ly_j + 1; + double cxj, cyj, czj, cxzj, cxyzj; + double fac_i = -2.0 * ai; + double fac_j = -2.0 * aj; + + for (jx = 0; jx <= lx_j; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jz = 0; jz <= lz_j; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + cxzj = cxj * czj; + for (jy = 0; jy <= ly_j_m1; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j_m1]] * ly_j; + cxyzj = cyj * cxzj; + ly = ly_i + jy; + SUM_NABLA_I; + } + for (jy = 0; jy <= ly_j_p1; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j_p1]] * fac_j; + cxyzj = cyj * cxzj; + ly = ly_i + jy; + SUM_NABLA_I; + } + } + } +} +*/ + +static void _vsigma_loop_ip1ip2_y(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i0, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + COMMON_INIT(y); + + ly_i = ly_i0 + 1; + ci = fac_i; + SUM_NABLA_J(x,y,z); + SUM_NABLA_J(y,x,z); + SUM_NABLA_J(z,x,y); + + if (ly_i0 > 0) { + ly_i = ly_i0 - 1; + ci = ly_i0; + SUM_NABLA_J(x,y,z); + SUM_NABLA_J(y,x,z); + SUM_NABLA_J(z,x,y); + } +} + + +/* +static void _vsigma_loop_ip1ip2_z(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + int lx, ly, lz; + int jx, jy, jz; + int lz_j_m1 = lz_j - 1; + int lz_j_p1 = lz_j + 1; + double cxj, cyj, czj, cxyj, cxyzj; + double fac_i = -2.0 * ai; + double fac_j = -2.0 * aj; + + for (jx = 0; jx <= lx_j; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jy = 0; jy <= ly_j; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + cxyj = cxj * cyj; + for (jz = 0; jz <= lz_j_m1; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j_m1]] * lz_j; + cxyzj = czj * cxyj; + lz = lz_i + jz; + SUM_NABLA_I; + } + for (jz = 0; jz <= lz_j_p1; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j_p1]] * fac_j; + cxyzj = czj * cxyj; + lz = lz_i + jz; + SUM_NABLA_I; + } + } + } +} +*/ + +static void _vsigma_loop_ip1ip2_z(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i0, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + COMMON_INIT(z); + + lz_i = lz_i0 + 1; + ci = fac_i; + SUM_NABLA_J(x,y,z); + SUM_NABLA_J(y,x,z); + SUM_NABLA_J(z,x,y); + + if (lz_i0 > 0) { + lz_i = lz_i0 - 1; + ci = lz_i0; + SUM_NABLA_J(x,y,z); + SUM_NABLA_J(y,x,z); + SUM_NABLA_J(z,x,y); + } +} + + +static void _vsigma_ip1ip2(void (*_v1_loop)(), double* v1x, + double* v1y, double* v1z, double* v1, + int li, int lj, double ai, double aj, + double* ri, double* rj, double* cache) +{ + int lx_i, ly_i, lz_i; + int lx_j, ly_j, lz_j; + double rij[3]; + + rij[0] = ri[0] - rj[0]; + rij[1] = ri[1] - rj[1]; + rij[2] = ri[2] - rj[2]; + + int topl = li + lj + 2; + int l1 = topl + 1; + int l1l1 = l1 * l1; + double *coeff = cache; + int dj = _LEN_CART[lj+1]; + cache += 3 * dj; + + _get_dm_to_dm_xyz_coeff(coeff, rij, lj+1, cache); + + double *pcx = coeff; + double *pcy = pcx + dj; + double *pcz = pcy + dj; + double *pv1 = v1; + for (lx_i = li; lx_i >= 0; lx_i--) { + for (ly_i = li-lx_i; ly_i >= 0; ly_i--) { + lz_i = li - lx_i - ly_i; + for (lx_j = lj; lx_j >= 0; lx_j--) { + for (ly_j = lj-lx_j; ly_j >= 0; ly_j--) { + lz_j = lj - lx_j - ly_j; + _v1_loop(pv1, v1x, v1y, v1z, pcx, pcy, pcz, ai, aj, + lx_i, ly_i, lz_i, lx_j, ly_j, lz_j, l1, l1l1); + pv1 += 1; + } + } + } + } +} + + +static void _vsigma_loop_lap1_x(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + int lx, ly, lz; + int jx, jy, jz; + double cxj, cyj, czj, cxyj, cxyzj; + double fac_x; + double fac_i = -2.0 * ai; + + for (jx = 0; jx <= lx_j; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jy = 0; jy <= ly_j; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + cxyj = cxj * cyj; + for (jz = 0; jz <= lz_j; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + cxyzj = cxyj * czj; + + fac_x = lx_i + 1; + pv1[0] += fac_x * fac_i * cxyzj * v1x[lx*l1l1+ly*l1+lz]; + if (lx_i - 1 > 0) { + fac_x = lx_i - 1; + pv1[0] += fac_x * lx_i * cxyzj * v1x[(lx-2)*l1l1+ly*l1+lz]; + } + + if (lx_i > 0) { + fac_x = lx_i; + if (ly_i > 0) { + pv1[0] += fac_x * ly_i * cxyzj * v1y[(lx-1)*l1l1+(ly-1)*l1+lz]; + } + pv1[0] += fac_x * fac_i * cxyzj * v1y[(lx-1)*l1l1+(ly+1)*l1+lz]; + + if (lz_i > 0) { + pv1[0] += fac_x * lz_i * cxyzj * v1z[(lx-1)*l1l1+ly*l1+lz-1]; + } + pv1[0] += fac_x * fac_i * cxyzj * v1z[(lx-1)*l1l1+ly*l1+lz+1]; + } + + fac_x = fac_i; + if (lx_i > 0) { + pv1[0] += fac_x * lx_i * cxyzj * v1x[lx*l1l1+ly*l1+lz]; + } + pv1[0] += fac_x * fac_i * cxyzj * v1x[(lx+2)*l1l1+ly*l1+lz]; + + if (ly_i > 0) { + pv1[0] += fac_x * ly_i * cxyzj * v1y[(lx+1)*l1l1+(ly-1)*l1+lz]; + } + pv1[0] += fac_x * fac_i * cxyzj * v1y[(lx+1)*l1l1+(ly+1)*l1+lz]; + + if (lz_i > 0) { + pv1[0] += fac_x * lz_i * cxyzj * v1z[(lx+1)*l1l1+ly*l1+lz-1]; + } + pv1[0] += fac_x * fac_i * cxyzj * v1z[(lx+1)*l1l1+ly*l1+lz+1]; + } + } + } +} + + +static void _vsigma_loop_lap1_y(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + int lx, ly, lz; + int jx, jy, jz; + double cxj, cyj, czj, cxyj, cxyzj; + double fac_y; + double fac_i = -2.0 * ai; + + for (jx = 0; jx <= lx_j; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jy = 0; jy <= ly_j; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + cxyj = cxj * cyj; + for (jz = 0; jz <= lz_j; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + cxyzj = cxyj * czj; + + fac_y = ly_i + 1; + pv1[0] += fac_y * fac_i * cxyzj * v1y[lx*l1l1+ly*l1+lz]; + if (ly_i - 1 > 0) { + fac_y = ly_i - 1; + pv1[0] += fac_y * ly_i * cxyzj * v1y[lx*l1l1+(ly-2)*l1+lz]; + } + + if (ly_i > 0) { + fac_y = ly_i; + if (lx_i > 0) { + pv1[0] += fac_y * lx_i * cxyzj * v1x[(lx-1)*l1l1+(ly-1)*l1+lz]; + } + pv1[0] += fac_y * fac_i * cxyzj * v1x[(lx+1)*l1l1+(ly-1)*l1+lz]; + + if (lz_i > 0) { + pv1[0] += fac_y * lz_i * cxyzj * v1z[lx*l1l1+(ly-1)*l1+lz-1]; + } + pv1[0] += fac_y * fac_i * cxyzj * v1z[lx*l1l1+(ly-1)*l1+lz+1]; + } + + fac_y = fac_i; + if (lx_i > 0) { + pv1[0] += fac_y * lx_i * cxyzj * v1x[(lx-1)*l1l1+(ly+1)*l1+lz]; + } + pv1[0] += fac_y * fac_i * cxyzj * v1x[(lx+1)*l1l1+(ly+1)*l1+lz]; + + if (ly_i > 0) { + pv1[0] += fac_y * ly_i * cxyzj * v1y[lx*l1l1+ly*l1+lz]; + } + pv1[0] += fac_y * fac_i * cxyzj * v1y[lx*l1l1+(ly+2)*l1+lz]; + + if (lz_i > 0) { + pv1[0] += fac_y * lz_i * cxyzj * v1z[lx*l1l1+(ly+1)*l1+lz-1]; + } + pv1[0] += fac_y * fac_i * cxyzj * v1z[lx*l1l1+(ly+1)*l1+lz+1]; + } + } + } +} + + +static void _vsigma_loop_lap1_z(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + int lx, ly, lz; + int jx, jy, jz; + double cxj, cyj, czj, cxyj, cxyzj; + double fac_z; + double fac_i = -2.0 * ai; + + for (jx = 0; jx <= lx_j; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jy = 0; jy <= ly_j; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + cxyj = cxj * cyj; + for (jz = 0; jz <= lz_j; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + cxyzj = cxyj * czj; + + fac_z = lz_i + 1; + pv1[0] += fac_z * fac_i * cxyzj * v1z[lx*l1l1+ly*l1+lz]; + if (lz_i - 1 > 0) { + fac_z = lz_i - 1; + pv1[0] += fac_z * lz_i * cxyzj * v1z[lx*l1l1+ly*l1+lz-2]; + } + + if (lz_i > 0) { + fac_z = lz_i; + if (lx_i > 0) { + pv1[0] += fac_z * lx_i * cxyzj * v1x[(lx-1)*l1l1+ly*l1+lz-1]; + } + pv1[0] += fac_z * fac_i * cxyzj * v1x[(lx+1)*l1l1+ly*l1+lz-1]; + + if (ly_i > 0) { + pv1[0] += fac_z * ly_i * cxyzj * v1y[lx*l1l1+(ly-1)*l1+lz-1]; + } + pv1[0] += fac_z * fac_i * cxyzj * v1y[lx*l1l1+(ly+1)*l1+lz-1]; + } + + fac_z = fac_i; + if (lx_i > 0) { + pv1[0] += fac_z * lx_i * cxyzj * v1x[(lx-1)*l1l1+ly*l1+lz+1]; + } + pv1[0] += fac_z * fac_i * cxyzj * v1x[(lx+1)*l1l1+ly*l1+lz+1]; + + if (ly_i > 0) { + pv1[0] += fac_z * ly_i * cxyzj * v1y[lx*l1l1+(ly-1)*l1+lz+1]; + } + pv1[0] += fac_z * fac_i * cxyzj * v1y[lx*l1l1+(ly+1)*l1+lz+1]; + + if (lz_i > 0) { + pv1[0] += fac_z * lz_i * cxyzj * v1z[lx*l1l1+ly*l1+lz]; + } + pv1[0] += fac_z * fac_i * cxyzj * v1z[lx*l1l1+ly*l1+lz+2]; + } + } + } +} + + +static void _vsigma_lap1(void (*_v1_loop)(), double* v1x, + double* v1y, double* v1z, double* v1, + int li, int lj, double ai, double aj, + double* ri, double* rj, double* cache) +{ + int lx_i, ly_i, lz_i; + int lx_j, ly_j, lz_j; + double rij[3]; + + rij[0] = ri[0] - rj[0]; + rij[1] = ri[1] - rj[1]; + rij[2] = ri[2] - rj[2]; + + int topl = li + lj + 2; + int l1 = topl + 1; + int l1l1 = l1 * l1; + double *coeff = cache; + int dj = _LEN_CART[lj]; + cache += 3 * dj; + + _get_dm_to_dm_xyz_coeff(coeff, rij, lj, cache); + + double *pcx = coeff; + double *pcy = pcx + dj; + double *pcz = pcy + dj; + double *pv1 = v1; + for (lx_i = li; lx_i >= 0; lx_i--) { + for (ly_i = li-lx_i; ly_i >= 0; ly_i--) { + lz_i = li - lx_i - ly_i; + for (lx_j = lj; lx_j >= 0; lx_j--) { + for (ly_j = lj-lx_j; ly_j >= 0; ly_j--) { + lz_j = lj - lx_j - ly_j; + _v1_loop(pv1, v1x, v1y, v1z, pcx, pcy, pcz, ai, aj, + lx_i, ly_i, lz_i, lx_j, ly_j, lz_j, l1, l1l1); + pv1 += 1; + } + } + } + } +} + + +int eval_mat_gga_orth(double *weights, double *out, int comp, + int li, int lj, double ai, double aj, + double *ri, double *rj, double fac, double cutoff, + int dimension, double* dh, double *a, double *b, + int *mesh, double *cache) +{ + int topl = li + lj + 1; + int l1 = topl+1; + int l1l1l1 = l1 * l1 * l1; + double *mat_xyz = cache; + cache += l1l1l1; + int grid_slice[6]; + double *xs_exp, *ys_exp, *zs_exp; + + int data_size = init_orth_data(&xs_exp, &ys_exp, &zs_exp, + grid_slice, dh, mesh, topl, cutoff, + ai, aj, ri, rj, cache); + if (data_size == 0) { + return 0; + } + cache += data_size; + + size_t ngrids = ((size_t)mesh[0]) * mesh[1] * mesh[2]; + double *vx = weights + ngrids; + double *vy = vx + ngrids; + double *vz = vy + ngrids; + + memset(mat_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_xyz, weights, li+lj, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + _dm_xyz_to_dm(mat_xyz, out, li, lj, ri, rj, cache); + + memset(mat_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_xyz, vx, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + _v1_xyz_to_v1(_vsigma_loop_x, mat_xyz, out, li, lj, ai, aj, ri, rj, cache); + + memset(mat_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_xyz, vy, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + _v1_xyz_to_v1(_vsigma_loop_y, mat_xyz, out, li, lj, ai, aj, ri, rj, cache); + + memset(mat_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_xyz, vz, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + _v1_xyz_to_v1(_vsigma_loop_z, mat_xyz, out, li, lj, ai, aj, ri, rj, cache); + + return 1; +} + + +int eval_mat_lda_orth(double *weights, double *out, int comp, + int li, int lj, double ai, double aj, + double *ri, double *rj, double fac, double cutoff, + int dimension, double* dh, double *a, double *b, + int *mesh, double *cache) +{ + int topl = li + lj; + int l1 = topl+1; + int l1l1l1 = l1*l1*l1; + int grid_slice[6]; + double *xs_exp, *ys_exp, *zs_exp; + int data_size = init_orth_data(&xs_exp, &ys_exp, &zs_exp, + grid_slice, dh, mesh, topl, cutoff, + ai, aj, ri, rj, cache); + + if (data_size == 0) { + return 0; + } + cache += data_size; + + double *dm_xyz = cache; + cache += l1l1l1; + + memset(dm_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(dm_xyz, weights, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + + _dm_xyz_to_dm(dm_xyz, out, li, lj, ri, rj, cache); + return 1; +} + + +int eval_mat_lda_orth_ip1(double *weights, double *out, int comp, + int li, int lj, double ai, double aj, + double *ri, double *rj, double fac, double cutoff, + int dimension, double* dh, double *a, double *b, + int *mesh, double *cache) +{ + int dij = _LEN_CART[li] * _LEN_CART[lj]; + int topl = li + lj + 1; + int l1 = topl+1; + int l1l1l1 = l1*l1*l1; + int grid_slice[6]; + double *xs_exp, *ys_exp, *zs_exp; + + int data_size = init_orth_data(&xs_exp, &ys_exp, &zs_exp, + grid_slice, dh, mesh, topl, cutoff, + ai, aj, ri, rj, cache); + if (data_size == 0) { + return 0; + } + cache += data_size; + + double *mat_xyz = cache; + cache += l1l1l1; + double *pout_x = out; + double *pout_y = pout_x + dij; + double *pout_z = pout_y + dij; + + memset(mat_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_xyz, weights, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + _v1_xyz_to_v1(_vrho_loop_ip1_x, mat_xyz, pout_x, li, lj, ai, aj, ri, rj, cache); + _v1_xyz_to_v1(_vrho_loop_ip1_y, mat_xyz, pout_y, li, lj, ai, aj, ri, rj, cache); + _v1_xyz_to_v1(_vrho_loop_ip1_z, mat_xyz, pout_z, li, lj, ai, aj, ri, rj, cache); + return 1; +} + + +int eval_mat_gga_orth_ip1(double *weights, double *out, int comp, + int li, int lj, double ai, double aj, + double *ri, double *rj, double fac, double cutoff, + int dimension, double* dh, double *a, double *b, + int *mesh, double *cache) +{ + int dij = _LEN_CART[li] * _LEN_CART[lj]; + int topl = li + lj + 2; + int l1 = topl+1; + int l1l1l1 = l1*l1*l1; + int grid_slice[6]; + double *xs_exp, *ys_exp, *zs_exp; + + int data_size = init_orth_data(&xs_exp, &ys_exp, &zs_exp, + grid_slice, dh, mesh, topl, cutoff, + ai, aj, ri, rj, cache); + if (data_size == 0) { + return 0; + } + cache += data_size; + + double *mat_xyz = cache; + double *mat_x = mat_xyz; + double *mat_y = mat_x + l1l1l1; + double *mat_z = mat_y + l1l1l1; + cache += l1l1l1*3; + double *pout_x = out; + double *pout_y = pout_x + dij; + double *pout_z = pout_y + dij; + + size_t ngrids = ((size_t)mesh[0]) * mesh[1] * mesh[2]; + double *vx = weights + ngrids; + double *vy = vx + ngrids; + double *vz = vy + ngrids; + + //vrho part + memset(mat_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_xyz, weights, topl-1, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + _v1_xyz_to_v1(_vrho_loop_ip1_x, mat_xyz, pout_x, li, lj, ai, aj, ri, rj, cache); + _v1_xyz_to_v1(_vrho_loop_ip1_y, mat_xyz, pout_y, li, lj, ai, aj, ri, rj, cache); + _v1_xyz_to_v1(_vrho_loop_ip1_z, mat_xyz, pout_z, li, lj, ai, aj, ri, rj, cache); + + //vsigma part + memset(mat_x, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_x, vx, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + + memset(mat_y, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_y, vy, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + + memset(mat_z, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_z, vz, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + + _vsigma_ip1ip2(_vsigma_loop_ip1ip2_x, mat_x, mat_y, mat_z, + pout_x, li, lj, ai, aj, ri, rj, cache); + _vsigma_ip1ip2(_vsigma_loop_ip1ip2_y, mat_x, mat_y, mat_z, + pout_y, li, lj, ai, aj, ri, rj, cache); + _vsigma_ip1ip2(_vsigma_loop_ip1ip2_z, mat_x, mat_y, mat_z, + pout_z, li, lj, ai, aj, ri, rj, cache); + + _vsigma_lap1(_vsigma_loop_lap1_x, mat_x, mat_y, mat_z, + pout_x, li, lj, ai, aj, ri, rj, cache); + _vsigma_lap1(_vsigma_loop_lap1_y, mat_x, mat_y, mat_z, + pout_y, li, lj, ai, aj, ri, rj, cache); + _vsigma_lap1(_vsigma_loop_lap1_z, mat_x, mat_y, mat_z, + pout_z, li, lj, ai, aj, ri, rj, cache); + return 1; +} + + +void _apply_ints(int (*eval_ints)(), double *weights, double *mat, + PGFPair* pgfpair, int comp, double fac, int dimension, + double* dh, double *a, double *b, int *mesh, + double* ish_gto_norm, double* jsh_gto_norm, + int *ish_atm, int *ish_bas, double *ish_env, + int *jsh_atm, int *jsh_bas, double *jsh_env, + double* Ls, double *cache) +{ + int i_sh = pgfpair->ish; + int j_sh = pgfpair->jsh; + int ipgf = pgfpair->ipgf; + int jpgf = pgfpair->jpgf; + int iL = pgfpair->iL; + double cutoff = pgfpair->radius; + + int li = ish_bas[ANG_OF+i_sh*BAS_SLOTS]; + int lj = jsh_bas[ANG_OF+j_sh*BAS_SLOTS]; + int di = _LEN_CART[li]; + int dj = _LEN_CART[lj]; + + int ish_nprim = ish_bas[NPRIM_OF+i_sh*BAS_SLOTS]; + int jsh_nprim = jsh_bas[NPRIM_OF+j_sh*BAS_SLOTS]; + int naoi = ish_nprim * di; + int naoj = jsh_nprim * dj; + + double *ri = ish_env + ish_atm[PTR_COORD+ish_bas[ATOM_OF+i_sh*BAS_SLOTS]*ATM_SLOTS]; + double *rj = jsh_env + jsh_atm[PTR_COORD+jsh_bas[ATOM_OF+j_sh*BAS_SLOTS]*ATM_SLOTS]; + double *rL = Ls + iL*3; + double rjL[3]; + rjL[0] = rj[0] + rL[0]; + rjL[1] = rj[1] + rL[1]; + rjL[2] = rj[2] + rL[2]; + + double ai = ish_env[ish_bas[PTR_EXP+i_sh*BAS_SLOTS]+ipgf]; + double aj = jsh_env[jsh_bas[PTR_EXP+j_sh*BAS_SLOTS]+jpgf]; + double ci = ish_gto_norm[ipgf]; + double cj = jsh_gto_norm[jpgf]; + double aij = ai + aj; + double rrij = CINTsquare_dist(ri, rjL); + double eij = (ai * aj / aij) * rrij; + if (eij > EIJCUTOFF) { + return; + } + fac *= exp(-eij) * ci * cj * CINTcommon_fac_sp(li) * CINTcommon_fac_sp(lj); + if (fac < ish_env[PTR_EXPDROP] && fac < jsh_env[PTR_EXPDROP]) { + return; + } + + double *out = cache; + memset(out, 0, comp*di*dj*sizeof(double)); + cache += comp * di * dj; + + int value = (*eval_ints)(weights, out, comp, li, lj, ai, aj, ri, rjL, + fac, cutoff, dimension, dh, a, b, mesh, cache); + + double *pmat = mat + ipgf*di*naoj + jpgf*dj; + if (value != 0) { + int i, j, ic; + for (ic = 0; ic < comp; ic++) { + for (i = 0; i < di; i++) { + #pragma omp simd + for (j = 0; j < dj; j++) { + pmat[i*naoj+j] += out[i*dj+j]; + } + } + pmat += naoi * naoj; + out += di * dj; + } + } +} + + +static size_t _ints_cache_size(int l, int nprim, int nctr, int* mesh, double radius, double* dh, int comp) +{ + size_t size = 0; + size_t nmx = get_max_num_grid_orth(dh, radius); + int max_mesh = MAX(MAX(mesh[0], mesh[1]), mesh[2]); + int l1 = 2 * l + 1; + if (comp == 3) { + l1 += 1; + } + int l1l1 = l1 * l1; + int ncart = _LEN_CART[l1]; // use l1 to be safe + + size += comp * nprim * nprim * ncart * ncart; // dm_cart + size += comp * ncart * ncart; // out + size += l1 * (mesh[0] + mesh[1] + mesh[2]); // xs_exp, ys_exp, zs_exp + + size_t size_orth_components = l1 * nmx + nmx; // orth_components + size += l1l1 * l1; // dm_xyz + size += 3 * (ncart + l1); // _dm_xyz_to_dm + + size_t size_orth_ints = 0; + if (nmx < max_mesh) { + size_orth_ints = (l1 + l1l1) * nmx; + } else { + size_orth_ints = l1*mesh[2] + l1l1*mesh[0]; + } + size += MAX(size_orth_components, size_orth_ints); + size += nctr * ncart * nprim * ncart; + //size += 1000000; + //printf("Memory allocated per thread for make_mat: %ld MB.\n", size*sizeof(double) / 1000000); + return size; +} + + +static size_t _ints_core_cache_size(int* mesh, double radius, double* dh, int comp) +{ + size_t size = 0; + size_t nmx = get_max_num_grid_orth(dh, radius); + int max_mesh = MAX(MAX(mesh[0], mesh[1]), mesh[2]); + const int l = 0; + int l1 = l + 1; + if (comp == 3) { + l1 += 1; + } + int l1l1 = l1 * l1; + int ncart = _LEN_CART[l1]; + + size_t size_orth_components = l1 * nmx + nmx; + size_t size_orth_ints = 0; + if (nmx < max_mesh) { + size_orth_ints = (l1 + l1l1) * nmx; + } else { + size_orth_ints = l1*mesh[2] + l1l1*mesh[0]; + } + size += MAX(size_orth_components, size_orth_ints); + size += l1 * (mesh[0] + mesh[1] + mesh[2]); + size += l1l1 * l1; + size += 3 * (ncart + l1); + //size += 1000000; + return size; +} + + +void grid_integrate_drv(int (*eval_ints)(), double* mat, double* weights, TaskList** task_list, + int comp, int hermi, int grid_level, + int *shls_slice, int* ish_ao_loc, int* jsh_ao_loc, + int dimension, double* Ls, double* a, double* b, + int* ish_atm, int* ish_bas, double* ish_env, + int* jsh_atm, int* jsh_bas, double* jsh_env, int cart) +{ + TaskList* tl = *task_list; + GridLevel_Info* gridlevel_info = tl->gridlevel_info; + Task *task = (tl->tasks)[grid_level]; + int ntasks = task->ntasks; + if (ntasks <= 0) { + return; + } + double max_radius = task->radius; + PGFPair **pgfpairs = task->pgfpairs; + int* mesh = gridlevel_info->mesh + grid_level*3; + + double dh[9]; + get_grid_spacing(dh, a, mesh); + + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + //const int nijsh = nish * njsh; + const int naoi = ish_ao_loc[ish1] - ish_ao_loc[ish0]; + const int naoj = jsh_ao_loc[jsh1] - jsh_ao_loc[jsh0]; + + int ish_lmax = get_lmax(ish0, ish1, ish_bas); + int jsh_lmax = ish_lmax; + if (hermi != 1) { + jsh_lmax = get_lmax(jsh0, jsh1, jsh_bas); + } + + int ish_nprim_max = get_nprim_max(ish0, ish1, ish_bas); + int jsh_nprim_max = ish_nprim_max; + if (hermi != 1) { + jsh_nprim_max = get_nprim_max(jsh0, jsh1, jsh_bas); + } + + int ish_nctr_max = get_nctr_max(ish0, ish1, ish_bas); + int jsh_nctr_max = ish_nctr_max; + if (hermi != 1) { + jsh_nctr_max = get_nctr_max(jsh0, jsh1, jsh_bas); + } + + double **gto_norm_i = (double**) malloc(sizeof(double*) * nish); + double **cart2sph_coeff_i = (double**) malloc(sizeof(double*) * nish); + get_cart2sph_coeff(cart2sph_coeff_i, gto_norm_i, ish0, ish1, ish_bas, ish_env, cart); + double **gto_norm_j = gto_norm_i; + double **cart2sph_coeff_j = cart2sph_coeff_i; + if (hermi != 1) { + gto_norm_j = (double**) malloc(sizeof(double*) * njsh); + cart2sph_coeff_j = (double**) malloc(sizeof(double*) * njsh); + get_cart2sph_coeff(cart2sph_coeff_j, gto_norm_j, jsh0, jsh1, jsh_bas, jsh_env, cart); + } + + int *task_loc; + int nblock = get_task_loc(&task_loc, pgfpairs, ntasks, ish0, ish1, jsh0, jsh1, hermi); + + size_t cache_size = _ints_cache_size(MAX(ish_lmax,jsh_lmax), + MAX(ish_nprim_max, jsh_nprim_max), + MAX(ish_nctr_max, jsh_nctr_max), + mesh, max_radius, dh, comp); + +#pragma omp parallel +{ + int ish, jsh, itask, iblock; + int li, lj, ish_nprim, jsh_nprim; + PGFPair *pgfpair = NULL; + double *ptr_gto_norm_i, *ptr_gto_norm_j; + double *cache0 = malloc(sizeof(double) * cache_size); + double *dm_cart = cache0; + int len_dm_cart = comp*ish_nprim_max*_LEN_CART[ish_lmax]*jsh_nprim_max*_LEN_CART[jsh_lmax]; + double *cache = dm_cart + len_dm_cart; + + #pragma omp for schedule(dynamic) + for (iblock = 0; iblock < nblock; iblock+=2) { + itask = task_loc[iblock]; + pgfpair = pgfpairs[itask]; + ish = pgfpair->ish; + jsh = pgfpair->jsh; + ptr_gto_norm_i = gto_norm_i[ish]; + ptr_gto_norm_j = gto_norm_j[jsh]; + li = ish_bas[ANG_OF+ish*BAS_SLOTS]; + lj = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + ish_nprim = ish_bas[NPRIM_OF+ish*BAS_SLOTS]; + jsh_nprim = jsh_bas[NPRIM_OF+jsh*BAS_SLOTS]; + len_dm_cart = comp*ish_nprim*_LEN_CART[li]*jsh_nprim*_LEN_CART[lj]; + memset(dm_cart, 0, len_dm_cart * sizeof(double)); + for (; itask < task_loc[iblock+1]; itask++) { + pgfpair = pgfpairs[itask]; + _apply_ints(eval_ints, weights, dm_cart, pgfpair, comp, 1.0, dimension, dh, a, b, mesh, + ptr_gto_norm_i, ptr_gto_norm_j, ish_atm, ish_bas, ish_env, + jsh_atm, jsh_bas, jsh_env, Ls, cache); + } + transform_dm_inverse(dm_cart, mat, comp, + cart2sph_coeff_i[ish], cart2sph_coeff_j[jsh], + ish_ao_loc, jsh_ao_loc, ish_bas, jsh_bas, + ish, jsh, ish0, jsh0, naoi, naoj, cache); + if (hermi == 1 && ish != jsh) { + fill_tril(mat, comp, ish_ao_loc, jsh_ao_loc, + ish, jsh, ish0, jsh0, naoi, naoj); + } + } + free(cache0); +} + + if (task_loc) { + free(task_loc); + } + del_cart2sph_coeff(cart2sph_coeff_i, gto_norm_i, ish0, ish1); + if (hermi != 1) { + del_cart2sph_coeff(cart2sph_coeff_j, gto_norm_j, jsh0, jsh1); + } +} + + +void int_gauss_charge_v_rs(int (*eval_ints)(), double* out, double* v_rs, int comp, + int* atm, int* bas, int nbas, double* env, + int* mesh, int dimension, double* a, double* b, double max_radius) +{ + double dh[9]; + get_grid_spacing(dh, a, mesh); + + size_t cache_size = _ints_core_cache_size(mesh, max_radius, dh, comp); + +#pragma omp parallel +{ + int ia, ib; + double alpha, coeff, charge, rad, fac; + double *r0; + double *cache = (double*) malloc(sizeof(double) * cache_size); + #pragma omp for schedule(static) + for (ib = 0; ib < nbas; ib++) { + ia = bas[ib*BAS_SLOTS+ATOM_OF]; + alpha = env[bas[ib*BAS_SLOTS+PTR_EXP]]; + coeff = env[bas[ib*BAS_SLOTS+PTR_COEFF]]; + charge = (double)atm[ia*ATM_SLOTS+CHARGE_OF]; + r0 = env + atm[ia*ATM_SLOTS+PTR_COORD]; + fac = -charge * coeff; + rad = env[atm[ia*ATM_SLOTS+PTR_RADIUS]]; + (*eval_ints)(v_rs, out+ia*comp, comp, 0, 0, alpha, 0.0, r0, r0, + fac, rad, dimension, dh, a, b, mesh, cache); + } + free(cache); +} +} diff --git a/pyscf/lib/dft/libxc_itrf.c b/pyscf/lib/dft/libxc_itrf.c index 76d7497980..badeab597a 100644 --- a/pyscf/lib/dft/libxc_itrf.c +++ b/pyscf/lib/dft/libxc_itrf.c @@ -15,6 +15,7 @@ * * Authors: Qiming Sun * Susi Lehtola + * Xing Zhang * * libxc from * http://www.tddft.org/programs/octopus/wiki/index.php/Libxc:manual @@ -24,7 +25,10 @@ #include #include #include +#include "config.h" #define MAX(X,Y) ((X) > (Y) ? (X) : (Y)) +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) +#define MAX_THREADS 256 // TODO: register python signal #define raise_error return @@ -83,13 +87,13 @@ * In spin restricted case (spin == 1), rho_u is assumed to be the * spin-free quantities, rho_d is not used. */ -static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) +static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np, int ld_rho_u) { int i; double *sigma, *tau; double *gxu, *gyu, *gzu, *gxd, *gyd, *gzd; double *tau_u, *tau_d; - double *rho_d = rho_u + np * nvar; + double *rho_d = rho_u + ld_rho_u * nvar; switch (nvar) { case LDA_NVAR: @@ -107,12 +111,12 @@ static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) case GGA_NVAR: if (spin == 1) { sigma = rho + np * 2; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - gxd = rho_d + np; - gyd = rho_d + np * 2; - gzd = rho_d + np * 3; + gxu = rho_u + ld_rho_u; + gyu = rho_u + ld_rho_u * 2; + gzu = rho_u + ld_rho_u * 3; + gxd = rho_d + ld_rho_u; + gyd = rho_d + ld_rho_u * 2; + gzd = rho_d + ld_rho_u * 3; for (i = 0; i < np; i++) { rho[i*2+0] = rho_u[i]; rho[i*2+1] = rho_d[i]; @@ -122,9 +126,9 @@ static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) } } else { sigma = rho + np; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; + gxu = rho_u + ld_rho_u; + gyu = rho_u + ld_rho_u * 2; + gzu = rho_u + ld_rho_u * 3; for (i = 0; i < np; i++) { rho[i] = rho_u[i]; sigma[i] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; @@ -135,14 +139,14 @@ static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) if (spin == 1) { sigma = rho + np * 2; tau = sigma + np * 3; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - gxd = rho_d + np; - gyd = rho_d + np * 2; - gzd = rho_d + np * 3; - tau_u = rho_u + np * 4; - tau_d = rho_d + np * 4; + gxu = rho_u + ld_rho_u; + gyu = rho_u + ld_rho_u * 2; + gzu = rho_u + ld_rho_u * 3; + gxd = rho_d + ld_rho_u; + gyd = rho_d + ld_rho_u * 2; + gzd = rho_d + ld_rho_u * 3; + tau_u = rho_u + ld_rho_u * 4; + tau_d = rho_d + ld_rho_u * 4; for (i = 0; i < np; i++) { rho[i*2+0] = rho_u[i]; rho[i*2+1] = rho_d[i]; @@ -157,10 +161,10 @@ static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) } else { sigma = rho + np; tau = sigma + np; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - tau_u = rho_u + np * 4; + gxu = rho_u + ld_rho_u; + gyu = rho_u + ld_rho_u * 2; + gzu = rho_u + ld_rho_u * 3; + tau_u = rho_u + ld_rho_u * 4; for (i = 0; i < np; i++) { rho[i] = rho_u[i]; sigma[i] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; @@ -171,7 +175,7 @@ static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) } } static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, - double *rho, double *exc) + double *rho, double *exc, int offset, int blksize) { double *sigma, *tau; double *lapl = rho; @@ -266,6 +270,21 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, if (deriv > 3) { v4rho4 = v3rho3 + np * 4; } + + // set offset + exc += offset; + if (deriv > 0) { + vrho += offset * 2; + } + if (deriv > 1) { + v2rho2 += offset * 3; + } + if (deriv > 2) { + v3rho3 += offset * 4; + } + if (deriv > 3) { + v4rho4 += offset * 5; + } } else { if (deriv > 0) { vrho = exc + np; @@ -279,15 +298,30 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, if (deriv > 3) { v4rho4 = v3rho3 + np; } + + // set offset + exc += offset; + if (deriv > 0) { + vrho += offset; + } + if (deriv > 1) { + v2rho2 += offset; + } + if (deriv > 2) { + v3rho3 += offset; + } + if (deriv > 3) { + v4rho4 += offset; + } } - xc_lda(func_x, np, rho, exc, vrho, v2rho2, v3rho3, v4rho4); + xc_lda(func_x, blksize, rho, exc, vrho, v2rho2, v3rho3, v4rho4); break; case XC_FAMILY_GGA: #ifdef XC_FAMILY_HYB_GGA case XC_FAMILY_HYB_GGA: #endif if (spin == 1) { - sigma = rho + np * 2; + sigma = rho + blksize * 2; if (deriv > 0) { vrho = exc + np; vsigma = vrho + np * 2; @@ -310,8 +344,33 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, v4rhosigma3 = v4rho2sigma2 + np * 3*6 ; v4sigma4 = v4rhosigma3 + np * 2*10; } + + // set offset + exc += offset; + if (deriv > 0) { + vrho += offset * 2; + vsigma += offset * 3; + } + if (deriv > 1) { + v2rho2 += offset * 3; + v2rhosigma += offset * 6; + v2sigma2 += offset * 6; + } + if (deriv > 2) { + v3rho3 += offset * 4; + v3rho2sigma += offset * 9; + v3rhosigma2 += offset * 12; + v3sigma3 += offset * 10; + } + if (deriv > 3) { + v4rho4 += offset * 5; + v4rho3sigma += offset * 4*3; + v4rho2sigma2 += offset * 3*6; + v4rhosigma3 += offset * 2*10; + v4sigma4 += offset * 15; + } } else { - sigma = rho + np; + sigma = rho + blksize; if (deriv > 0) { vrho = exc + np; vsigma = vrho + np; @@ -334,8 +393,33 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, v4rhosigma3 = v4rho2sigma2 + np; v4sigma4 = v4rhosigma3 + np; } + + // set offset + exc += offset; + if (deriv > 0) { + vrho += offset; + vsigma += offset; + } + if (deriv > 1) { + v2rho2 += offset; + v2rhosigma += offset; + v2sigma2 += offset; + } + if (deriv > 2) { + v3rho3 += offset; + v3rho2sigma += offset; + v3rhosigma2 += offset; + v3sigma3 += offset; + } + if (deriv > 3) { + v4rho4 += offset; + v4rho3sigma += offset; + v4rho2sigma2 += offset; + v4rhosigma3 += offset; + v4sigma4 += offset; + } } - xc_gga(func_x, np, rho, sigma, + xc_gga(func_x, blksize, rho, sigma, exc, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3, @@ -346,8 +430,8 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, case XC_FAMILY_HYB_MGGA: #endif if (spin == 1) { - sigma = rho + np * 2; - tau = sigma + np * 3; + sigma = rho + blksize * 2; + tau = sigma + blksize * 3; if (deriv > 0) { vrho = exc + np; vsigma = vrho + np * 2; @@ -390,9 +474,54 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, v4sigmatau3 = v4sigma2tau2 + np * 6*3 ; v4tau4 = v4sigmatau3 + np * 3*4 ; } + + // set offset + exc += offset; + if (deriv > 0) { + vrho += offset * 2; + vsigma += offset * 3; + vtau += offset * 2; + } + if (deriv > 1) { + v2rho2 += offset * 3; + v2rhosigma += offset * 6; + v2sigma2 += offset * 6; + v2rhotau += offset * 4; + v2sigmatau += offset * 6; + v2tau2 += offset * 3; + } + if (deriv > 2) { + v3rho3 += offset * 4 ; + v3rho2sigma += offset * 9 ; + v3rhosigma2 += offset * 12; + v3sigma3 += offset * 10; + v3rho2tau += offset * 6 ; + v3rhosigmatau += offset * 12; + v3rhotau2 += offset * 6 ; + v3sigma2tau += offset * 12; + v3sigmatau2 += offset * 9 ; + v3tau3 += offset * 4 ; + } + if (deriv > 3) { + v4rho4 += offset * 5 ; + v4rho3sigma += offset * 4*3 ; + v4rho2sigma2 += offset * 3*6 ; + v4rhosigma3 += offset * 2*10 ; + v4sigma4 += offset * 15 ; + v4rho3tau += offset * 4*2 ; + v4rho2sigmatau += offset * 3*3*2; + v4rho2tau2 += offset * 3*3 ; + v4rhosigma2tau += offset * 2*6*2; + v4rhosigmatau2 += offset * 2*3*3; + v4rhotau3 += offset * 2*4 ; + v4sigma3tau += offset * 10*2 ; + v4sigma2tau2 += offset * 6*3 ; + v4sigmatau3 += offset * 3*4 ; + v4tau4 += offset * 5 ; + } } else { - sigma = rho + np; - tau = sigma + np; + sigma = rho + blksize; + tau = sigma + blksize; if (deriv > 0) { vrho = exc + np; vsigma = vrho + np; @@ -435,8 +564,53 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, v4sigmatau3 = v4sigma2tau2 + np; v4tau4 = v4sigmatau3 + np; } + + // set offset + exc += offset; + if (deriv > 0) { + vrho += offset; + vsigma += offset; + vtau += offset; + } + if (deriv > 1) { + v2rho2 += offset; + v2rhosigma += offset; + v2sigma2 += offset; + v2rhotau += offset; + v2sigmatau += offset; + v2tau2 += offset; + } + if (deriv > 2) { + v3rho3 += offset; + v3rho2sigma += offset; + v3rhosigma2 += offset; + v3sigma3 += offset; + v3rho2tau += offset; + v3rhosigmatau += offset; + v3rhotau2 += offset; + v3sigma2tau += offset; + v3sigmatau2 += offset; + v3tau3 += offset; + } + if (deriv > 3) { + v4rho4 += offset; + v4rho3sigma += offset; + v4rho2sigma2 += offset; + v4rhosigma3 += offset; + v4sigma4 += offset; + v4rho3tau += offset; + v4rho2sigmatau += offset; + v4rho2tau2 += offset; + v4rhosigma2tau += offset; + v4rhosigmatau2 += offset; + v4rhotau3 += offset; + v4sigma3tau += offset; + v4sigma2tau2 += offset; + v4sigmatau3 += offset; + v4tau4 += offset; + } } - xc_mgga(func_x, np, rho, sigma, lapl, tau, + xc_mgga(func_x, blksize, rho, sigma, lapl, tau, exc, vrho, vsigma, vlapl, vtau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, @@ -705,6 +879,7 @@ static void axpy(double *dst, double *src, double fac, { int i, j; for (j = 0; j < nsrc; j++) { + #pragma omp parallel for schedule(static) for (i = 0; i < np; i++) { dst[j*np+i] += fac * src[i*nsrc+j]; } @@ -760,6 +935,7 @@ static void merge_xc(double *dst, double *ebuf, double fac, pout = dst + offsets1[order] * np; pin = ebuf + offsets0[order] * np; nsrc = offsets0[order+1] - offsets0[order]; + #pragma omp parallel for schedule(static) for (i = 0; i < np * nsrc; i++) { pout[i] += fac * pin[i]; } @@ -802,10 +978,36 @@ void LIBXC_eval_xc(int nfn, int *fn_id, double *fac, double *omega, { assert(deriv <= 4); double *ebuf = malloc(sizeof(double) * np * outlen); - double *rho = malloc(sizeof(double) * np * 7); - _eval_rho(rho, rho_u, spin, nvar, np); - int nspin = spin + 1; + double *rhobufs[MAX_THREADS]; + int offsets[MAX_THREADS+1]; +#pragma omp parallel +{ + int iblk = omp_get_thread_num(); + int nblk = omp_get_num_threads(); + assert(nblk <= MAX_THREADS); + + int blksize = np / nblk; + int ioff = iblk * blksize; + int np_mod = np % nblk; + if (iblk < np_mod) { + blksize += 1; + } + if (np_mod > 0) { + ioff += MIN(iblk, np_mod); + } + offsets[iblk] = ioff; + if (iblk == nblk-1) { + offsets[nblk] = np; + assert(ioff + blksize == np); + } + + double *rho_priv = malloc(sizeof(double) * blksize * 7); + rhobufs[iblk] = rho_priv; + _eval_rho(rho_priv, rho_u+ioff, spin, nvar, blksize, np); +} + + int nspin = spin + 1; int i, j; xc_func_type func; for (i = 0; i < nfn; i++) { @@ -857,13 +1059,25 @@ void LIBXC_eval_xc(int nfn, int *fn_id, double *fac, double *omega, #if defined XC_SET_RELATIVITY xc_lda_x_set_params(&func, relativity); #endif - _eval_xc(&func, spin, deriv, np, rho, ebuf); + +#pragma omp parallel +{ + int iblk = omp_get_thread_num(); + int offset = offsets[iblk]; + int blksize = offsets[iblk+1] - offset; + _eval_xc(&func, spin, deriv, np, rhobufs[iblk], ebuf, offset, blksize); +} + merge_xc(output, ebuf, fac[i], spin, deriv, nvar, np, outlen, func.info->family); xc_func_end(&func); } free(ebuf); - free(rho); +#pragma omp parallel +{ + int iblk = omp_get_thread_num(); + free(rhobufs[iblk]); +} } int LIBXC_max_deriv_order(int xc_id) diff --git a/pyscf/lib/dft/multigrid.c b/pyscf/lib/dft/multigrid.c new file mode 100644 index 0000000000..593aedf1b8 --- /dev/null +++ b/pyscf/lib/dft/multigrid.c @@ -0,0 +1,744 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include +#include "config.h" +#include "cint.h" +#include "pbc/neighbor_list.h" +#include "pbc/cell.h" +#include "dft/multigrid.h" + +#define SQUARE(r) (r[0]*r[0]+r[1]*r[1]+r[2]*r[2]) +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define BUF_SIZE 2000 +#define ADD_SIZE 1000 +#define RZERO 1e-6 + +const int _LEN_CART[] = { + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105, 120, 136 +}; + +const int _LEN_CART0[] = { + 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105, 120 +}; + +const int _BINOMIAL_COEF[] = { + 1, + 1, 1, + 1, 2, 1, + 1, 3, 3, 1, + 1, 4, 6, 4, 1, + 1, 5, 10, 10, 5, 1, + 1, 6, 15, 20, 15, 6, 1, + 1, 7, 21, 35, 35, 21, 7, 1, + 1, 8, 28, 56, 70, 56, 28, 8, 1, + 1, 9, 36, 84, 126, 126, 84, 36, 9, 1, + 1, 10, 45, 120, 210, 252, 210, 120, 45, 10, 1, + 1, 11, 55, 165, 330, 462, 462, 330, 165, 55, 11, 1, + 1, 12, 66, 220, 495, 792, 924, 792, 495, 220, 66, 12, 1, + 1, 13, 78, 286, 715,1287,1716,1716,1287, 715, 286, 78, 13, 1, + 1, 14, 91, 364,1001,2002,3003,3432,3003,2002,1001, 364, 91, 14, 1, + 1, 15, 105, 455,1365,3003,5005,6435,6435,5005,3003,1365, 455, 105, 15, 1, +}; + +double CINTsquare_dist(const double *r1, const double *r2); + +void init_gridlevel_info(GridLevel_Info** gridlevel_info, + double* cutoff, int* mesh, int nlevels, double rel_cutoff) +{ + GridLevel_Info* gl_info = (GridLevel_Info*) malloc(sizeof(GridLevel_Info)); + gl_info->nlevels = nlevels; + gl_info->rel_cutoff = rel_cutoff; + gl_info->cutoff = (double*) malloc(sizeof(double) * nlevels); + gl_info->mesh = (int*) malloc(sizeof(int) * nlevels * 3); + int i; + for (i = 0; i < nlevels; i++) { + (gl_info->cutoff)[i] = cutoff[i]; + (gl_info->mesh)[i*3] = mesh[i*3]; + (gl_info->mesh)[i*3+1] = mesh[i*3+1]; + (gl_info->mesh)[i*3+2] = mesh[i*3+2]; + } + *gridlevel_info = gl_info; +} + + +void init_rs_grid(RS_Grid** rs_grid, GridLevel_Info** gridlevel_info, int comp) +{ + RS_Grid* rg = (RS_Grid*) malloc(sizeof(RS_Grid)); + GridLevel_Info* gl_info = *gridlevel_info; + int nlevels = gl_info->nlevels; + rg->nlevels = nlevels; + rg->gridlevel_info = gl_info; + rg->comp = comp; + + int i; + size_t ngrid; + int *mesh = gl_info->mesh; + rg->data = (double**)malloc(sizeof(double*) * nlevels); + for (i = 0; i < nlevels; i++) { + ngrid = mesh[i*3] * mesh[i*3+1] * mesh[i*3+2]; + (rg->data)[i] = calloc(comp*ngrid, sizeof(double)); + } + *rs_grid = rg; +} + + +void del_rs_grid(RS_Grid** rs_grid) +{ + RS_Grid* rg = *rs_grid; + if (!rg) { + return; + } + if (rg->data) { + int i; + for (i = 0; i < rg->nlevels; i++) { + if (rg->data[i]) { + free(rg->data[i]); + } + } + free(rg->data); + } + rg->gridlevel_info = NULL; + free(rg); + *rs_grid = NULL; +} + + +void del_gridlevel_info(GridLevel_Info** gridlevel_info) +{ + GridLevel_Info* gl_info = *gridlevel_info; + if (!gl_info) { + return; + } + if (gl_info->cutoff) { + free(gl_info->cutoff); + } + if (gl_info->mesh) { + free(gl_info->mesh); + } + free(gl_info); + *gridlevel_info = NULL; +} + + +void init_pgfpair(PGFPair** pair_info, + int ish, int ipgf, int jsh, int jpgf, int iL, double radius) +{ + PGFPair *pair0 = (PGFPair*) malloc(sizeof(PGFPair)); + pair0->ish = ish; + pair0->ipgf = ipgf; + pair0->jsh = jsh; + pair0->jpgf = jpgf; + pair0->iL = iL; + pair0->radius = radius; + *pair_info = pair0; +} + + +bool pgfpairs_with_same_shells(PGFPair *pair1, PGFPair *pair2) +{ + if (!pair1 || !pair2) { + return false; + } + if (pair1->ish == pair2->ish && pair1->jsh == pair2->jsh) { + return true; + } + return false; +} + + +double pgfpair_radius(int la, int lb, double zeta, double zetb, double* ra, double* rab, double precision) +{ + double radius = 0; + double zetp = zeta + zetb; + double eps = precision * precision; + + if (rab[0] < RZERO && rab[1] < RZERO && rab[2] < RZERO) { + radius = pgf_rcut(la+lb, zetp, 1., eps, radius); + return radius; + } + + double prefactor = exp(-zeta*zetb/zetp*SQUARE(rab)); + double rb[3], rp[3]; + rb[0] = ra[0] + rab[0]; + rb[1] = ra[1] + rab[1]; + rb[2] = ra[2] + rab[2]; + rp[0] = ra[0] + zetb/zetp*rab[0]; + rp[1] = ra[1] + zetb/zetp*rab[1]; + rp[2] = ra[2] + zetb/zetp*rab[2]; + + double rad_a = sqrt(CINTsquare_dist(ra, rp)); + double rad_b = sqrt(CINTsquare_dist(rb, rp)); + + int lmax = la + lb; + double coef[lmax+1]; + double rap[la+1]; + double rbp[lb+1]; + + int lxa, lxb, i; + for (i = 0; i <= lmax; i++) { + coef[i] = 0; + } + rap[0] = 1.; + for (i = 1; i <= la; i++) { + rap[i] = rap[i-1] * rad_a; + } + rbp[0] = 1.; + for (i = 1; i <= lb; i++) { + rbp[i] = rbp[i-1] * rad_b; + } + + for (lxa = 0; lxa <= la; lxa++) { + for (lxb = 0; lxb <= lb; lxb++) { + coef[lxa+lxb] += BINOMIAL(la, lxa) * BINOMIAL(lb, lxb) * rap[la-lxa] * rbp[lb-lxb]; + } + } + + for (i = 0; i <= lmax; i++){ + coef[i] *= prefactor; + radius = MAX(radius, pgf_rcut(i, zetp, coef[i], eps, radius)); + } + return radius; +} + + +void del_pgfpair(PGFPair** pair_info) +{ + PGFPair *pair0 = *pair_info; + if (!pair0) { + return; + } else { + free(pair0); + } + *pair_info = NULL; +} + + +//unlink the pgfpair data instead of deleting +void nullify_pgfpair(PGFPair** pair_info) +{ + *pair_info = NULL; +} + + +void init_task(Task** task) +{ + Task *t0 = *task = (Task*) malloc(sizeof(Task)); + t0->ntasks = 0; + t0->buf_size = BUF_SIZE; + t0->pgfpairs = (PGFPair**) malloc(sizeof(PGFPair*) * t0->buf_size); + int i; + for (i = 0; i < t0->buf_size; i++) { + (t0->pgfpairs)[i] = NULL; + } +} + + +void del_task(Task** task) +{ + Task *t0 = *task; + if (!t0) { + return; + } + if (t0->pgfpairs) { + size_t i, ntasks = t0->ntasks; + for (i = 0; i < ntasks; i++) { + del_pgfpair(t0->pgfpairs + i); + } + free(t0->pgfpairs); + } + free(t0); + *task = NULL; +} + + +void nullify_task(Task** task) +{ + Task *t0 = *task; + if (!t0) { + return; + } + if (t0->pgfpairs) { + size_t i, ntasks = t0->ntasks; + for (i = 0; i < ntasks; i++) { + nullify_pgfpair(t0->pgfpairs + i); + } + free(t0->pgfpairs); + } + free(t0); + *task = NULL; +} + + +void init_task_list(TaskList** task_list, GridLevel_Info* gridlevel_info, int nlevels, int hermi) +{ + TaskList* tl = *task_list = (TaskList*) malloc(sizeof(TaskList)); + tl->nlevels = nlevels; + tl->hermi = hermi; + tl->gridlevel_info = gridlevel_info; + tl->tasks = (Task**) malloc(sizeof(Task*)*nlevels); + int i; + for (i = 0; i < nlevels; i++) { + init_task(tl->tasks + i); + } +} + + +void del_task_list(TaskList** task_list) +{ + TaskList *tl = *task_list; + if (!tl) { + return; + } + if (tl->gridlevel_info) { + del_gridlevel_info(&(tl->gridlevel_info)); + tl->gridlevel_info = NULL; + } + if (tl->tasks) { + int i; + for (i = 0; i < tl->nlevels; i++) { + if ((tl->tasks)[i]) { + del_task(tl->tasks + i); + } + } + free(tl->tasks); + } + free(tl); + *task_list = NULL; +} + + +void nullify_task_list(TaskList** task_list) +{ + TaskList *tl = *task_list; + if (!tl) { + return; + } + if (tl->gridlevel_info) { + tl->gridlevel_info = NULL; + } + if (tl->tasks) { + int i; + for (i = 0; i < tl->nlevels; i++) { + if ((tl->tasks)[i]) { + nullify_task(tl->tasks + i); + } + } + free(tl->tasks); + } + free(tl); + *task_list = NULL; +} + + +void update_task_list(TaskList** task_list, int grid_level, + int ish, int ipgf, int jsh, int jpgf, int iL, double radius) +{ + TaskList* tl = *task_list; + Task *t0 = (tl->tasks)[grid_level]; + t0->ntasks += 1; + if (t0->ntasks > t0->buf_size) { + t0->buf_size += ADD_SIZE; + t0->pgfpairs = (PGFPair**) realloc(t0->pgfpairs, sizeof(PGFPair*) * t0->buf_size); + } + init_pgfpair(t0->pgfpairs + t0->ntasks - 1, + ish, ipgf, jsh, jpgf, iL, radius); +} + + +void merge_task_list(TaskList** task_list, TaskList** task_list_loc) +{ + TaskList* tl = *task_list; + TaskList* tl_loc = *task_list_loc; + int ilevel, itask; + for (ilevel = 0; ilevel < tl->nlevels; ilevel++) { + Task *t0 = (tl->tasks)[ilevel]; + Task *t1 = (tl_loc->tasks)[ilevel]; + int itask_off = t0->ntasks; + int ntasks_loc = t1->ntasks; + t0->ntasks += ntasks_loc; + t0->buf_size = t0->ntasks; + t0->pgfpairs = (PGFPair**) realloc(t0->pgfpairs, sizeof(PGFPair*) * t0->buf_size); + PGFPair** ptr_pgfpairs = t0->pgfpairs + itask_off; + PGFPair** ptr_pgfpairs_loc = t1->pgfpairs; + for (itask = 0; itask < ntasks_loc; itask++) { + ptr_pgfpairs[itask] = ptr_pgfpairs_loc[itask]; + } + } +} + + +int get_grid_level(GridLevel_Info* gridlevel_info, double alpha) +{ + int i; + int nlevels = gridlevel_info->nlevels; + int grid_level = nlevels - 1; //default use the most dense grid + double needed_cutoff = alpha * gridlevel_info->rel_cutoff; + for (i = 0; i < nlevels; i++) { + if ((gridlevel_info->cutoff)[i] >= needed_cutoff) { + grid_level = i; + break; + } + } + return grid_level; +} + + +void build_task_list(TaskList** task_list, NeighborList** neighbor_list, + GridLevel_Info** gridlevel_info, + int* ish_atm, int* ish_bas, double* ish_env, + double* ish_rcut, double** ipgf_rcut, + int* jsh_atm, int* jsh_bas, double* jsh_env, + double* jsh_rcut, double** jpgf_rcut, + int nish, int njsh, double* Ls, double precision, int hermi) +{ + GridLevel_Info *gl_info = *gridlevel_info; + int ilevel; + int nlevels = gl_info->nlevels; + init_task_list(task_list, gl_info, nlevels, hermi); + double max_radius[nlevels]; + NeighborList *nl0 = *neighbor_list; + +#pragma omp parallel private(ilevel) +{ + double max_radius_loc[nlevels]; + TaskList** task_list_loc = (TaskList**) malloc(sizeof(TaskList*)); + init_task_list(task_list_loc, gl_info, nlevels, hermi); + NeighborPair *np0_ij; + int ish, jsh; + int li, lj; + int ipgf, jpgf; + int nipgf, njpgf; + int iL, iL_idx; + int ish_atm_id, jsh_atm_id; + int ish_alpha_of, jsh_alpha_of; + double ipgf_alpha, jpgf_alpha; + double *ish_ratm, *jsh_ratm, *rL; + double rij[3]; + double dij, radius; + + #pragma omp for schedule(dynamic) + for (ish = 0; ish < nish; ish++) { + li = ish_bas[ANG_OF+ish*BAS_SLOTS]; + nipgf = ish_bas[NPRIM_OF+ish*BAS_SLOTS]; + ish_atm_id = ish_bas[ish*BAS_SLOTS+ATOM_OF]; + ish_ratm = ish_env + ish_atm[ish_atm_id*ATM_SLOTS+PTR_COORD]; + ish_alpha_of = ish_bas[PTR_EXP+ish*BAS_SLOTS]; + for (jsh = 0; jsh < njsh; jsh++) { + if (hermi == 1 && jsh < ish) { + continue; + } + np0_ij = (nl0->pairs)[ish*njsh + jsh]; + if (np0_ij->nimgs > 0) { + lj = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + njpgf = jsh_bas[NPRIM_OF+jsh*BAS_SLOTS]; + jsh_atm_id = jsh_bas[jsh*BAS_SLOTS+ATOM_OF]; + jsh_ratm = jsh_env + jsh_atm[jsh_atm_id*ATM_SLOTS+PTR_COORD]; + jsh_alpha_of = jsh_bas[PTR_EXP+jsh*BAS_SLOTS]; + + for (iL_idx = 0; iL_idx < np0_ij->nimgs; iL_idx++){ + iL = (np0_ij->Ls_list)[iL_idx]; + rL = Ls + iL*3; + rij[0] = jsh_ratm[0] + rL[0] - ish_ratm[0]; + rij[1] = jsh_ratm[1] + rL[1] - ish_ratm[1]; + rij[2] = jsh_ratm[2] + rL[2] - ish_ratm[2]; + dij = sqrt(SQUARE(rij)); + + for (ipgf = 0; ipgf < nipgf; ipgf++) { + if (ipgf_rcut[ish][ipgf] + jsh_rcut[jsh] < dij) { + continue; + } + ipgf_alpha = ish_env[ish_alpha_of+ipgf]; + for (jpgf = 0; jpgf < njpgf; jpgf++) { + //if (hermi == 1 && ish == jsh && jpgf < ipgf) { + // continue; + //} + if (ipgf_rcut[ish][ipgf] + jpgf_rcut[jsh][jpgf] < dij) { + continue; + } + jpgf_alpha = jsh_env[jsh_alpha_of+jpgf]; + ilevel = get_grid_level(gl_info, ipgf_alpha+jpgf_alpha); + radius = pgfpair_radius(li, lj, ipgf_alpha, jpgf_alpha, ish_ratm, rij, precision); + if (radius < RZERO) { + continue; + } + max_radius_loc[ilevel] = MAX(radius, max_radius_loc[ilevel]); + update_task_list(task_list_loc, ilevel, ish, ipgf, jsh, jpgf, iL, radius); + } + } + } + } + } + } + + #pragma omp critical + merge_task_list(task_list, task_list_loc); + + nullify_task_list(task_list_loc); + free(task_list_loc); + + #pragma omp critical + for (ilevel = 0; ilevel < nlevels; ilevel++) { + max_radius[ilevel] = MAX(max_radius[ilevel], max_radius_loc[ilevel]); + } +} + + for (ilevel = 0; ilevel < nlevels; ilevel++) { + Task *t0 = ((*task_list)->tasks)[ilevel]; + t0->radius = max_radius[ilevel]; + } +} + + +int get_task_loc(int** task_loc, PGFPair** pgfpairs, int ntasks, + int ish0, int ish1, int jsh0, int jsh1, int hermi) +{ + int n = -2; + int ish_prev = -1; + int jsh_prev = -1; + int itask, ish, jsh; + int *buf = (int*)malloc(sizeof(int) * ntasks*2); + PGFPair *pgfpair; + for(itask = 0; itask < ntasks; itask++){ + pgfpair = pgfpairs[itask]; + ish = pgfpair->ish; + jsh = pgfpair->jsh; + if (ish < ish0 || ish >= ish1) { + continue; + } + if (jsh < jsh0 || jsh >= jsh1) { + continue; + } + if (hermi == 1 && jsh < ish) { + continue; + } + + if (ish != ish_prev || jsh != jsh_prev) { + n += 2; + buf[n] = itask; + buf[n+1] = itask+1; + ish_prev = ish; + jsh_prev = jsh; + } else { + buf[n+1] = itask+1; + } + } + n += 2; + *task_loc = (int*)realloc(buf, sizeof(int) * n); + return n; +} + + +void gradient_gs(double complex* out, double complex* f_gs, double* Gv, + int n, size_t ng) +{ + int i; + double complex *outx, *outy, *outz; + for (i = 0; i < n; i++) { + outx = out; + outy = outx + ng; + outz = outy + ng; + #pragma omp parallel + { + size_t igrid; + double *pGv; + #pragma omp for schedule(static) + for (igrid = 0; igrid < ng; igrid++) { + pGv = Gv + igrid * 3; + outx[igrid] = pGv[0] * creal(f_gs[igrid]) * _Complex_I - pGv[0] * cimag(f_gs[igrid]); + outy[igrid] = pGv[1] * creal(f_gs[igrid]) * _Complex_I - pGv[1] * cimag(f_gs[igrid]); + outz[igrid] = pGv[2] * creal(f_gs[igrid]) * _Complex_I - pGv[2] * cimag(f_gs[igrid]); + } + } + f_gs += ng; + out += 3 * ng; + } +} + +/* +int get_task_loc_diff_ish(int** task_loc, PGFPair** pgfpairs, int ntasks, + int ish0, int ish1) +{ + int n = -2; + int ish_prev = -1; + int itask, ish; + int *buf = (int*)malloc(sizeof(int) * ntasks*2); + PGFPair *pgfpair; + for(itask = 0; itask < ntasks; itask++){ + pgfpair = pgfpairs[itask]; + ish = pgfpair->ish; + if (ish < ish0 || ish >= ish1) { + continue; + } + + if (ish != ish_prev) { + n += 2; + buf[n] = itask; + ish_prev = ish; + } + if (ish == ish_prev) { + buf[n+1] = itask+1; + } + } + n += 2; + *task_loc = (int*)realloc(buf, sizeof(int) * n); + return n; +} +*/ + +/* +typedef struct Task_Index_struct { + int ntasks; + int bufsize; + int* task_index; +} Task_Index; + + +void init_task_index(Task_Index* task_idx) +{ + task_idx->ntasks = 0; + task_idx->bufsize = 10; + task_idx->task_index = (int*)malloc(sizeof(int) * task_idx->bufsize); +} + + +void update_task_index(Task_Index* task_idx, int itask) +{ + task_idx->ntasks += 1; + if (task_idx->bufsize < task_idx->ntasks) { + task_idx->bufsize += 10; + task_idx->task_index = (int*)realloc(task_idx->task_index, sizeof(int) * task_idx->bufsize); + } + task_idx->task_index[task_idx->ntasks-1] = itask; +} + + +void del_task_index(Task_Index* task_idx) +{ + if (!task_idx) { + return; + } + if (task_idx->task_index) { + free(task_idx->task_index); + } + task_idx->ntasks = 0; + task_idx->bufsize = 0; +} + + +typedef struct Shlpair_Task_Index_struct { + int nish; + int njsh; + int ish0; + int jsh0; + Task_Index *task_index; +} Shlpair_Task_Index; + + +void init_shlpair_task_index(Shlpair_Task_Index* shlpair_task_idx, + int ish0, int jsh0, int nish, int njsh) +{ + shlpair_task_idx->ish0 = ish0; + shlpair_task_idx->jsh0 = jsh0; + shlpair_task_idx->nish = nish; + shlpair_task_idx->njsh = njsh; + shlpair_task_idx->task_index = (Task_Index*)malloc(sizeof(Task_Index)*nish*njsh); + + int ijsh; + for (ijsh = 0; ijsh < nish*njsh; ijsh++) { + init_task_index(shlpair_task_idx->task_index + ijsh); + } +} + + +void update_shlpair_task_index(Shlpair_Task_Index* shlpair_task_idx, + int ish, int jsh, int itask) +{ + int ish0 = shlpair_task_idx->ish0; + int jsh0 = shlpair_task_idx->jsh0; + int njsh = shlpair_task_idx->njsh; + int ioff = ish - ish0; + int joff = jsh - jsh0; + + update_task_index(shlpair_task_idx->task_index + ioff*njsh+joff, itask); +} + + +int get_task_index(Shlpair_Task_Index* shlpair_task_idx, int** idx, int ish, int jsh) +{ + int ish0 = shlpair_task_idx->ish0; + int jsh0 = shlpair_task_idx->jsh0; + int njsh = shlpair_task_idx->njsh; + int ioff = ish - ish0; + int joff = jsh - jsh0; + Task_Index *task_idx = shlpair_task_idx->task_index + ioff*njsh+joff; + int ntasks = task_idx->ntasks; + *idx = task_idx->task_index; + return ntasks; +} + + +void del_shlpair_task_index(Shlpair_Task_Index* shlpair_task_idx) +{ + if (!shlpair_task_idx) { + return; + } + + int nish = shlpair_task_idx->nish; + int njsh = shlpair_task_idx->njsh; + int ijsh; + for (ijsh = 0; ijsh < nish*njsh; ijsh++) { + del_task_index(shlpair_task_idx->task_index + ijsh); + } + free(shlpair_task_idx->task_index); +} + + +Shlpair_Task_Index* get_shlpair_task_index(PGFPair** pgfpairs, int ntasks, + int ish0, int ish1, int jsh0, int jsh1, int hermi) +{ + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + + Shlpair_Task_Index* shlpair_task_idx = (Shlpair_Task_Index*) malloc(sizeof(Shlpair_Task_Index)); + init_shlpair_task_index(shlpair_task_idx, ish0, jsh0, nish, njsh); + + int itask; + int ish, jsh; + PGFPair *pgfpair = NULL; + for(itask = 0; itask < ntasks; itask++){ + pgfpair = pgfpairs[itask]; + ish = pgfpair->ish; + if (ish < ish0 || ish >= ish1) { + continue; + } + jsh = pgfpair->jsh; + if (jsh < jsh0 || jsh >= jsh1) { + continue; + } + if (hermi == 1 && jsh < ish) { + continue; + } + update_shlpair_task_index(shlpair_task_idx, ish, jsh, itask); + } + return shlpair_task_idx; +} +*/ diff --git a/pyscf/lib/dft/multigrid.h b/pyscf/lib/dft/multigrid.h new file mode 100644 index 0000000000..e691a3ce12 --- /dev/null +++ b/pyscf/lib/dft/multigrid.h @@ -0,0 +1,72 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#ifndef HAVE_DEFINED_MULTIGRID_H +#define HAVE_DEFINED_MULTIGRID_H + +#include + +#define BINOMIAL(n, i) (_BINOMIAL_COEF[_LEN_CART0[n]+i]) + +extern const int _LEN_CART[]; +extern const int _LEN_CART0[]; +extern const int _BINOMIAL_COEF[]; + +typedef struct GridLevel_Info_struct { + int nlevels; + double rel_cutoff; + double *cutoff; + int *mesh; +} GridLevel_Info; + +typedef struct RS_Grid_struct { + int nlevels; + GridLevel_Info* gridlevel_info; + int comp; + double** data; +} RS_Grid; + +typedef struct PGFPair_struct { + int ish; + int ipgf; + int jsh; + int jpgf; + int iL; + double radius; +} PGFPair; + +bool pgfpairs_with_same_shells(PGFPair*, PGFPair*); + +typedef struct Task_struct { + size_t buf_size; + size_t ntasks; + PGFPair** pgfpairs; + double radius; +} Task; + +typedef struct TaskList_struct { + int nlevels; + int hermi; + GridLevel_Info* gridlevel_info; + Task** tasks; +} TaskList; + + +int get_task_loc(int** task_loc, PGFPair** pgfpairs, int ntasks, + int ish0, int ish1, int jsh0, int jsh1, int hermi); +#endif diff --git a/pyscf/lib/dft/utils.c b/pyscf/lib/dft/utils.c new file mode 100644 index 0000000000..04ef8e5b2f --- /dev/null +++ b/pyscf/lib/dft/utils.c @@ -0,0 +1,62 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include "config.h" +#include "vhf/fblas.h" +#if defined(HAVE_LIBXSMM) +#include "libxsmm.h" +#endif + + +void dgemm_wrapper(const char transa, const char transb, + const int m, const int n, const int k, + const double alpha, const double* a, const int lda, + const double* b, const int ldb, + const double beta, double* c, const int ldc) +{ +#if defined(HAVE_LIBXSMM) + if (transa == 'N') { + //libxsmm_dgemm(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + int prefetch = LIBXSMM_PREFETCH_AUTO; + int flags = transb != 'T' ? LIBXSMM_GEMM_FLAG_NONE : LIBXSMM_GEMM_FLAG_TRANS_B; + libxsmm_dmmfunction kernel = libxsmm_dmmdispatch(m, n, k, &lda, &ldb, &ldc, + &alpha, &beta, &flags, &prefetch); + if (kernel) { + kernel(a,b,c,a,b,c); + return; + } + } +#endif + dgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); +} + +void get_gga_vrho_gs(double complex *out, double complex *vrho_gs, double complex *vsigma1_gs, + double *Gv, double weight, int ngrid) +{ + int i; + int ngrid2 = 2 * ngrid; + double complex fac = -2. * _Complex_I; + #pragma omp parallel for simd schedule(static) + for (i = 0; i < ngrid; i++) { + out[i] = ( Gv[i*3] * vsigma1_gs[i] + +Gv[i*3+1] * vsigma1_gs[i+ngrid] + +Gv[i*3+2] * vsigma1_gs[i+ngrid2]) * fac + vrho_gs[i]; + out[i] *= weight; + } +} diff --git a/pyscf/lib/dft/utils.h b/pyscf/lib/dft/utils.h new file mode 100644 index 0000000000..1c85ff1fdc --- /dev/null +++ b/pyscf/lib/dft/utils.h @@ -0,0 +1,27 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#ifndef HAVE_DEFINED_GRID_UTILS_H +#define HAVE_DEFINED_GRID_UTILS_H + +extern void dgemm_wrapper(const char transa, const char transb, + const int m, const int n, const int k, + const double alpha, const double* a, const int lda, + const double* b, const int ldb, + const double beta, double* c, const int ldc); +#endif diff --git a/pyscf/lib/np_helper/np_helper.h b/pyscf/lib/np_helper/np_helper.h index 2c8227c03d..3ed8d05574 100644 --- a/pyscf/lib/np_helper/np_helper.h +++ b/pyscf/lib/np_helper/np_helper.h @@ -61,3 +61,10 @@ void NPdset0(double *p, const size_t n); void NPzset0(double complex *p, const size_t n); void NPdcopy(double *out, const double *in, const size_t n); void NPzcopy(double complex *out, const double complex *in, const size_t n); + +void NPdgemm(const char trans_a, const char trans_b, + const int m, const int n, const int k, + const int lda, const int ldb, const int ldc, + const int offseta, const int offsetb, const int offsetc, + double *a, double *b, double *c, + const double alpha, const double beta); diff --git a/pyscf/lib/numpy_helper.py b/pyscf/lib/numpy_helper.py index 406fa54e20..58508d9f8b 100644 --- a/pyscf/lib/numpy_helper.py +++ b/pyscf/lib/numpy_helper.py @@ -1116,6 +1116,16 @@ def expm(a): y, buf = buf, y return y +def ndarray_pointer_2d(array): + '''Get the C pointer of a 2D array + ''' + assert array.ndim == 2 + assert array.flags.c_contiguous + + ptr = (array.ctypes.data + + numpy.arange(array.shape[0])*array.strides[0]).astype(numpy.uintp) + ptr = ptr.ctypes.data_as(ctypes.c_void_p) + return ptr class NPArrayWithTag(numpy.ndarray): # Initialize kwargs in function tag_array diff --git a/pyscf/lib/pbc/CMakeLists.txt b/pyscf/lib/pbc/CMakeLists.txt index 6d185fdf85..636cb75451 100644 --- a/pyscf/lib/pbc/CMakeLists.txt +++ b/pyscf/lib/pbc/CMakeLists.txt @@ -13,10 +13,20 @@ # limitations under the License. add_library(pbc SHARED ft_ao.c fill_ints.c fill_ints_sr.c optimizer.c grid_ao.c - nr_direct.c symmetry.c inner_dot.c cint2e.c cint3c2e.c nr_ecp.c transform_mo.c) + nr_direct.c symmetry.c inner_dot.c cint2e.c cint3c2e.c nr_ecp.c transform_mo.c + neighbor_list.c cell.c pp.c hf_grad.c fill_ints_screened.c) add_dependencies(pbc cgto cvhf np_helper) set_target_properties(pbc PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}) target_link_libraries(pbc cgto cint cvhf np_helper ${BLAS_LIBRARIES} ${OPENMP_C_PROPERTIES}) + +if(ENABLE_FFTW) +add_library(fft SHARED fft.c) +set_target_properties(fft PROPERTIES + LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR} + COMPILE_FLAGS ${OpenMP_C_FLAGS} + LINK_FLAGS ${OpenMP_C_FLAGS}) +target_link_libraries(fft fftw3_threads fftw3 ${BLAS_LIBRARIES}) +endif() diff --git a/pyscf/lib/pbc/cell.c b/pyscf/lib/pbc/cell.c new file mode 100644 index 0000000000..20bb96e72c --- /dev/null +++ b/pyscf/lib/pbc/cell.c @@ -0,0 +1,280 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include +#include "config.h" +#include "cint.h" +#include "pbc/cell.h" +#include "np_helper/np_helper.h" + +#define SQUARE(r) (r[0]*r[0]+r[1]*r[1]+r[2]*r[2]) + +double pgf_rcut(int l, double alpha, double coeff, double precision, double r0) +{ + l += 2; + + double rcut; + double rmin = sqrt(.5 * l / alpha) * 2.; + double gmax = coeff * pow(rmin, l) * exp(-alpha * rmin * rmin); + if (gmax < precision) { + return rmin; + } + + double eps = MIN(rmin/10, RCUT_EPS); + double c = log(coeff / precision); + double rcut_last; + rcut = MAX(r0, rmin+eps); + + int i; + for (i = 0; i < RCUT_MAX_CYCLE; i++) { + rcut_last = rcut; + rcut = sqrt((l*log(rcut) + c) / alpha); + if (fabs(rcut - rcut_last) < eps) { + break; + } + } + if (i == RCUT_MAX_CYCLE) { + //printf("r0 = %.6e, l = %d, alpha = %.6e, coeff = %.6e, precision=%.6e\n", r0, l, alpha, coeff, precision); + fprintf(stderr, "pgf_rcut did not converge in %d cycles: %.6f > %.6f.\n", + RCUT_MAX_CYCLE, fabs(rcut - rcut_last), eps); + } + return rcut; +} + +void rcut_by_shells(double* shell_radius, double** ptr_pgf_rcut, + int* bas, double* env, int nbas, + double r0, double precision) +{ +#pragma omp parallel +{ + int ib, ic, p; + #pragma omp for schedule(static) + for (ib = 0; ib < nbas; ib ++) { + int l = bas[ANG_OF+ib*BAS_SLOTS]; + int nprim = bas[NPRIM_OF+ib*BAS_SLOTS]; + int ptr_exp = bas[PTR_EXP+ib*BAS_SLOTS]; + int nctr = bas[NCTR_OF+ib*BAS_SLOTS]; + int ptr_c = bas[PTR_COEFF+ib*BAS_SLOTS]; + double rcut_max = 0, rcut; + for (p = 0; p < nprim; p++) { + double alpha = env[ptr_exp+p]; + double cmax = 0; + for (ic = 0; ic < nctr; ic++) { + cmax = MAX(fabs(env[ptr_c+ic*nprim+p]), cmax); + } + rcut = pgf_rcut(l, alpha, cmax, precision, r0); + if (ptr_pgf_rcut) { + ptr_pgf_rcut[ib][p] = rcut; + } + rcut_max = MAX(rcut, rcut_max); + } + shell_radius[ib] = rcut_max; + } +} +} + + +static void get_SI_real_imag(double* out_real, double* out_imag, + double* coords, double* Gv, + int natm, size_t ngrid) +{ +#pragma omp parallel +{ + int ia; + size_t i; + double RG; + double *pcoords, *pGv; + double *pout_real, *pout_imag; + #pragma omp for schedule(static) + for (ia = 0; ia < natm; ia++) { + pcoords = coords + ia * 3; + pout_real = out_real + ia * ngrid; + pout_imag = out_imag + ia * ngrid; + for (i = 0; i < ngrid; i++) { + pGv = Gv + i * 3; + RG = pcoords[0] * pGv[0] + pcoords[1] * pGv[1] + pcoords[2] * pGv[2]; + pout_real[i] = cos(RG); + pout_imag[i] = -sin(RG); + } + } +} +} + + +void get_Gv(double* Gv, double* rx, double* ry, double* rz, int* mesh, double* b) +{ +#pragma omp parallel +{ + int x, y, z; + double *pGv; + #pragma omp for schedule(dynamic) + for (x = 0; x < mesh[0]; x++) { + pGv = Gv + x * (size_t)mesh[1] * mesh[2] * 3; + for (y = 0; y < mesh[1]; y++) { + for (z = 0; z < mesh[2]; z++) { + pGv[0] = rx[x] * b[0]; + pGv[0] += ry[y] * b[3]; + pGv[0] += rz[z] * b[6]; + pGv[1] = rx[x] * b[1]; + pGv[1] += ry[y] * b[4]; + pGv[1] += rz[z] * b[7]; + pGv[2] = rx[x] * b[2]; + pGv[2] += ry[y] * b[5]; + pGv[2] += rz[z] * b[8]; + pGv += 3; + }} + } +} +} + + +void ewald_gs_nuc_grad(double* out, double* Gv, double* charges, double* coords, + double ew_eta, double weights, int natm, size_t ngrid) +{ + double *SI_real = (double*) malloc(natm*ngrid*sizeof(double)); + double *SI_imag = (double*) malloc(natm*ngrid*sizeof(double)); + get_SI_real_imag(SI_real, SI_imag, coords, Gv, natm, ngrid); + + double *ZSI_real = calloc(ngrid, sizeof(double)); + double *ZSI_imag = calloc(ngrid, sizeof(double)); + + NPdgemm('N', 'N', ngrid, 1, natm, + ngrid, natm, ngrid, 0, 0, 0, + SI_real, charges, ZSI_real, 1., 0.); + NPdgemm('N', 'N', ngrid, 1, natm, + ngrid, natm, ngrid, 0, 0, 0, + SI_imag, charges, ZSI_imag, 1., 0.); + +#pragma omp parallel +{ + int ia; + size_t i; + double charge_i; + double G2, coulG, tmp; + double *pout, *pGv; + double *pSI_real, *pSI_imag; + double fac = 4. * M_PI * weights; + double fac1 = 4. * ew_eta * ew_eta; + + #pragma omp for schedule(static) + for (ia = 0; ia < natm; ia++) { + charge_i = charges[ia]; + pout = out + ia * 3; + pSI_real = SI_real + ia * ngrid; + pSI_imag = SI_imag + ia * ngrid; + #pragma omp simd + for (i = 0; i < ngrid; i++) { + pGv = Gv + i*3; + G2 = SQUARE(pGv); + if (G2 < 1e-12) {continue;} + coulG = fac / G2 * exp(-G2 / fac1); + tmp = coulG * charge_i; + tmp *= (pSI_imag[i] * ZSI_real[i] - pSI_real[i] * ZSI_imag[i]); + pout[0] += tmp * pGv[0]; + pout[1] += tmp * pGv[1]; + pout[2] += tmp * pGv[2]; + } + } +} + free(SI_real); + free(SI_imag); + free(ZSI_real); + free(ZSI_imag); +} + + +void get_ewald_direct(double* ewovrl, double* chargs, double* coords, double* Ls, + double beta, double rcut, int natm, int nL) +{ + *ewovrl = 0.0; + + #pragma omp parallel + { + int i, j, l; + double *ri, *rj, *rL; + double rij[3]; + double r, qi, qj; + double e_loc = 0.0; + #pragma omp for schedule(static) + for (i = 0; i < natm; i++) { + ri = coords + i*3; + qi = chargs[i]; + for (j = 0; j < natm; j++) { + rj = coords + j*3; + qj = chargs[j]; + for (l = 0; l < nL; l++) { + rL = Ls + l*3; + rij[0] = rj[0] + rL[0] - ri[0]; + rij[1] = rj[1] + rL[1] - ri[1]; + rij[2] = rj[2] + rL[2] - ri[2]; + r = sqrt(SQUARE(rij)); + if (r > 1e-10 && r < rcut) { + e_loc += qi * qj * erfc(beta * r) / r; + } + } + } + } + e_loc *= 0.5; + + #pragma omp critical + *ewovrl += e_loc; + } +} + + +void get_ewald_direct_nuc_grad(double* out, double* chargs, double* coords, double* Ls, + double beta, double rcut, int natm, int nL) +{ + double fac = 2. * beta / sqrt(M_PI); + double beta2 = beta * beta; + + #pragma omp parallel + { + int i, j, l; + double *ri, *rj, *rL, *pout; + double rij[3]; + double r, r2, qi, qj, tmp; + #pragma omp for schedule(static) + for (i = 0; i < natm; i++) { + pout = out + i*3; + ri = coords + i*3; + qi = chargs[i]; + for (j = 0; j < natm; j++) { + rj = coords + j*3; + qj = chargs[j]; + for (l = 0; l < nL; l++) { + rL = Ls + l*3; + rij[0] = ri[0] - rj[0] + rL[0]; + rij[1] = ri[1] - rj[1] + rL[1]; + rij[2] = ri[2] - rj[2] + rL[2]; + r2 = SQUARE(rij); + r = sqrt(r2); + if (r > 1e-10 && r < rcut) { + tmp = qi * qj * (erfc(beta * r) / (r2 * r) + fac * exp(-beta2 * r2) / r2); + pout[0] -= tmp * rij[0]; + pout[1] -= tmp * rij[1]; + pout[2] -= tmp * rij[2]; + } + } + } + } + } +} diff --git a/pyscf/lib/pbc/cell.h b/pyscf/lib/pbc/cell.h new file mode 100644 index 0000000000..bec26bb2ea --- /dev/null +++ b/pyscf/lib/pbc/cell.h @@ -0,0 +1,29 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#ifndef HAVE_DEFINED_CELL_H +#define HAVE_DEFINED_CELL_H + +#define RCUT_MAX_CYCLE 10 +#define RCUT_EPS 1e-3 + +double pgf_rcut(int l, double alpha, double coeff, double precision, double r0); +void rcut_by_shells(double* shell_radius, double** ptr_pgf_rcut, + int* bas, double* env, int nbas, + double r0, double precision); +#endif diff --git a/pyscf/lib/pbc/fft.c b/pyscf/lib/pbc/fft.c new file mode 100644 index 0000000000..3affbb9a02 --- /dev/null +++ b/pyscf/lib/pbc/fft.c @@ -0,0 +1,147 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include "config.h" + +#define BLKSIZE 128 +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) + +fftw_plan fft_create_r2c_plan(double* in, complex double* out, int rank, int* mesh) +{ + fftw_plan p; + p = fftw_plan_dft_r2c(rank, mesh, in, out, FFTW_ESTIMATE); + return p; +} + +fftw_plan fft_create_c2r_plan(complex double* in, double* out, int rank, int* mesh) +{ + fftw_plan p; + p = fftw_plan_dft_c2r(rank, mesh, in, out, FFTW_ESTIMATE); + return p; +} + +void fft_execute(fftw_plan p) +{ + fftw_execute(p); +} + +void fft_destroy_plan(fftw_plan p) +{ + fftw_destroy_plan(p); +} + +void _complex_fft(complex double* in, complex double* out, int* mesh, int rank, int sign) +{ + int i; + int nx = mesh[0]; + int nyz = 1; + for (i = 1; i < rank; i++) { + nyz *= mesh[i]; + } + int nmax = nyz / BLKSIZE * BLKSIZE; + fftw_plan p_2d = fftw_plan_dft(rank-1, mesh+1, in, out, sign, FFTW_ESTIMATE); + int nn[BLKSIZE] = {nx}; + fftw_plan p_3d_x = fftw_plan_many_dft(1, nn, BLKSIZE, + out, NULL, nyz, 1, + out, NULL, nyz, 1, + sign, FFTW_ESTIMATE); + + #pragma omp parallel private(i) + { + int off; + #pragma omp for schedule(dynamic) + for (i = 0; i < nx; i++) { + off = i * nyz; + fftw_execute_dft(p_2d, in+off, out+off); + } + + #pragma omp for schedule(dynamic) + for (i = 0; i < nmax; i+=BLKSIZE) { + fftw_execute_dft(p_3d_x, out+i, out+i); + } + } + fftw_destroy_plan(p_2d); + fftw_destroy_plan(p_3d_x); + + int nres = nyz - nmax; + if (nres > 0) { + fftw_plan p_3d_x = fftw_plan_many_dft(1, nn, nres, + out+nmax, NULL, nyz, 1, + out+nmax, NULL, nyz, 1, + sign, FFTW_ESTIMATE); + fftw_execute(p_3d_x); + fftw_destroy_plan(p_3d_x); + } +} + +void fft(complex double* in, complex double* out, int* mesh, int rank) +{ + _complex_fft(in, out, mesh, rank, FFTW_FORWARD); +} + +void ifft(complex double* in, complex double* out, int* mesh, int rank) +{ + _complex_fft(in, out, mesh, rank, FFTW_BACKWARD); + size_t i, n = 1; + for (i = 0; i < rank; i++) { + n *= mesh[i]; + } + double fac = 1. / (double)n; + #pragma omp parallel for schedule(static) + for (i = 0; i < n; i++) { + out[i] *= fac; + } +} + +void rfft(double* in, complex double* out, int* mesh, int rank) +{ + fftw_plan p = fftw_plan_dft_r2c(rank, mesh, in, out, FFTW_ESTIMATE); + fftw_execute(p); + fftw_destroy_plan(p); +} + +void irfft(complex double* in, double* out, int* mesh, int rank) +{ + fftw_plan p = fftw_plan_dft_c2r(rank, mesh, in, out, FFTW_ESTIMATE); + fftw_execute(p); + fftw_destroy_plan(p); + size_t i, n = 1; + for (i = 0; i < rank; i++) { + n *= mesh[i]; + } + double fac = 1. / (double)n; + #pragma omp parallel for schedule(static) + for (i = 0; i < n; i++) { + out[i] *= fac; + } +} + +void _copy_d2z(double complex *out, const double *in, const size_t n) +{ +#pragma omp parallel +{ + size_t i; + #pragma omp for schedule(static) + for (i = 0; i < n; i++) { + out[i] = in[i] + 0*_Complex_I; + } +} +} diff --git a/pyscf/lib/pbc/fft.h b/pyscf/lib/pbc/fft.h new file mode 100644 index 0000000000..edc5382f7e --- /dev/null +++ b/pyscf/lib/pbc/fft.h @@ -0,0 +1,26 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include + +#define FFT_PLAN fftw_plan + +FFT_PLAN fft_create_r2c_plan(double* in, complex double* out, int rank, int* mesh); +FFT_PLAN fft_create_c2r_plan(complex double* in, double* out, int rank, int* mesh); +void fft_execute(FFT_PLAN p); +void fft_destroy_plan(FFT_PLAN p); diff --git a/pyscf/lib/pbc/fill_ints.c b/pyscf/lib/pbc/fill_ints.c index 36c853724c..95857b19ee 100644 --- a/pyscf/lib/pbc/fill_ints.c +++ b/pyscf/lib/pbc/fill_ints.c @@ -1260,9 +1260,9 @@ static void shift_bas(double *env_loc, double *env, double *Ls, int ptr, int iL) env_loc[ptr+2] = env[ptr+2] + Ls[iL*3+2]; } -static void sort2c_ks1(double complex *out, double *bufr, double *bufi, - int *shls_slice, int *ao_loc, int nkpts, int comp, - int jsh, int msh0, int msh1) +void sort2c_ks1(double complex *out, double *bufr, double *bufi, + int *shls_slice, int *ao_loc, int nkpts, int comp, + int jsh, int msh0, int msh1) { const int ish0 = shls_slice[0]; const int ish1 = shls_slice[1]; diff --git a/pyscf/lib/pbc/fill_ints.h b/pyscf/lib/pbc/fill_ints.h new file mode 100644 index 0000000000..ec2000755e --- /dev/null +++ b/pyscf/lib/pbc/fill_ints.h @@ -0,0 +1,29 @@ +/* Copyright 2014-2024 The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#ifndef HAVE_DEFINED_PBC_FILL_INTS_H +#define HAVE_DEFINED_PBC_FILL_INTS_H + +void sort2c_gs1(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh); +void sort2c_gs2_igtj(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh); +void sort2c_gs2_ieqj(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh); +void sort2c_ks1(double complex *out, double *bufr, double *bufi, + int *shls_slice, int *ao_loc, int nkpts, int comp, + int jsh, int msh0, int msh1); +#endif diff --git a/pyscf/lib/pbc/fill_ints_screened.c b/pyscf/lib/pbc/fill_ints_screened.c new file mode 100644 index 0000000000..5d100c7ae3 --- /dev/null +++ b/pyscf/lib/pbc/fill_ints_screened.c @@ -0,0 +1,1012 @@ +/* Copyright 2021-2024 The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include +#include "config.h" +#include "cint.h" +#include "vhf/fblas.h" +#include "pbc/optimizer.h" +#include "pbc/fill_ints.h" +#include "pbc/neighbor_list.h" +#include "np_helper/np_helper.h" + +#define INTBUFMAX 1000 +#define INTBUFMAX10 8000 +#define IMGBLK 80 +#define OF_CMPLX 2 +#define MAX_THREADS 256 + +int GTOmax_shell_dim(int *ao_loc, int *shls_slice, int ncenter); +int GTOmax_cache_size(int (*intor)(), int *shls_slice, int ncenter, + int *atm, int natm, int *bas, int nbas, double *env); + +static int shloc_partition(int *kshloc, int *ao_loc, int ksh0, int ksh1, int dkmax) +{ + int ksh; + int nloc = 0; + int loclast = ao_loc[ksh0]; + kshloc[0] = ksh0; + for (ksh = ksh0+1; ksh < ksh1; ksh++) { + assert(ao_loc[ksh+1] - ao_loc[ksh] < dkmax); + if (ao_loc[ksh+1] - loclast > dkmax) { + nloc += 1; + kshloc[nloc] = ksh; + loclast = ao_loc[ksh]; + } + } + nloc += 1; + kshloc[nloc] = ksh1; + return nloc; +} + +static void shift_bas(double *env_loc, double *env, double *Ls, int ptr, int iL) +{ + env_loc[ptr+0] = env[ptr+0] + Ls[iL*3+0]; + env_loc[ptr+1] = env[ptr+1] + Ls[iL*3+1]; + env_loc[ptr+2] = env[ptr+2] + Ls[iL*3+2]; +} + +static void sort3c_gs1(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh, int msh0, int msh1) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int ksh0 = shls_slice[4]; + const int ksh1 = shls_slice[5]; + const size_t naoi = ao_loc[ish1] - ao_loc[ish0]; + const size_t naoj = ao_loc[jsh1] - ao_loc[jsh0]; + const size_t naok = ao_loc[ksh1] - ao_loc[ksh0]; + const size_t njk = naoj * naok; + const size_t nijk = njk * naoi; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int ip = ao_loc[ish] - ao_loc[ish0]; + const int jp = ao_loc[jsh] - ao_loc[jsh0]; + const int dij = di * dj; + out += (ip * naoj + jp) * naok; + + int i, j, k, ksh, ic, dk, dijk; + double *pin, *pout; + + for (ksh = msh0; ksh < msh1; ksh++) { + dk = ao_loc[ksh+1] - ao_loc[ksh]; + dijk = dij * dk; + for (ic = 0; ic < comp; ic++) { + pout = out + nijk * ic + ao_loc[ksh]-ao_loc[ksh0]; + pin = in + dijk * ic; + for (j = 0; j < dj; j++) { + for (i = 0; i < di; i++) { + for (k = 0; k < dk; k++) { + pout[i*njk+k] = pin[k*dij+i]; + } } + pout += naok; + pin += di; + } + } + in += dijk * comp; + } +} + +static void _nr3c_screened_fill_g(int (*intor)(), void (*fsort)(), double *out, int nkpts_ij, + int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int ksh0 = shls_slice[4]; + const int ksh1 = shls_slice[5]; + + jsh += jsh0; + ish += ish0; + int iptrxyz = atm[PTR_COORD+bas[ATOM_OF+ish*BAS_SLOTS]*ATM_SLOTS]; + int jptrxyz = atm[PTR_COORD+bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS]; + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + int dkmax = INTBUFMAX10 / dij / 2 * MIN(IMGBLK,nimgs); + int kshloc[ksh1-ksh0+1]; + int nkshloc = shloc_partition(kshloc, ao_loc, ksh0, ksh1, dkmax); + + int i, m, msh0, msh1, dijm; + int ksh, dk, iL, jL, dijkc, ksh_off, jsh_off; + int shls[3]; + + int nshi = ish1 - ish0; + int nshj = jsh1 - jsh0; + int nshij = nshi + nshj; + int idx_i, idx_j; + + int dijmc = dij * dkmax * comp; + double *bufL = buf + dijmc; + double *cache = bufL + dijmc; + double *pbuf; + int (*fprescreen)(); + if (pbcopt != NULL) { + fprescreen = pbcopt->fprescreen; + } else { + fprescreen = PBCnoscreen; + } + + shls[0] = ish; + shls[1] = jsh; + jsh_off = jsh - nshi; + NeighborList *nl0 = *neighbor_list; + NeighborPair *np0_ki, *np0_kj; + for (m = 0; m < nkshloc; m++) { + msh0 = kshloc[m]; + msh1 = kshloc[m+1]; + dkmax = ao_loc[msh1] - ao_loc[msh0]; + dijm = dij * dkmax; + dijmc = dijm * comp; + for (i = 0; i < dijmc; i++) { + bufL[i] = 0; + } + + pbuf = bufL; + for (ksh = msh0; ksh < msh1; ksh++){ + shls[2] = ksh; + ksh_off = ksh - nshij; + dk = ao_loc[ksh+1] - ao_loc[ksh]; + dijkc = dij*dk * comp; + np0_ki = (nl0->pairs)[ksh_off*nshi + ish]; + np0_kj = (nl0->pairs)[ksh_off*nshj + jsh_off]; + if (np0_ki->nimgs > 0 && np0_kj->nimgs > 0) { + for (idx_i = 0; idx_i < np0_ki->nimgs; idx_i++){ + iL = (np0_ki->Ls_list)[idx_i]; + shift_bas(env_loc, env, Ls, iptrxyz, iL); + for (idx_j = 0; idx_j < np0_kj->nimgs; idx_j++){ + jL = (np0_kj->Ls_list)[idx_j]; + shift_bas(env_loc, env, Ls, jptrxyz, jL); + + if ((*fprescreen)(shls, pbcopt, atm, bas, env_loc)) { + if ((*intor)(buf, NULL, shls, atm, natm, bas, nbas, + env_loc, cintopt, cache)) { + for (i = 0; i < dijkc; i++) { + pbuf[i] += buf[i]; + } + } + } + } + + } + } + pbuf += dijkc; + } + + (*fsort)(out, bufL, shls_slice, ao_loc, comp, ish, jsh, msh0, msh1); + } +} + +static void _nr3c_screened_sum_auxbas_fill_g(int (*intor)(), void (*fsort)(), double *out, int nkpts_ij, + int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int ksh0 = shls_slice[4]; + const int ksh1 = shls_slice[5]; + + jsh += jsh0; + ish += ish0; + int iptrxyz = atm[PTR_COORD+bas[ATOM_OF+ish*BAS_SLOTS]*ATM_SLOTS]; + int jptrxyz = atm[PTR_COORD+bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS]; + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + int dkmax = INTBUFMAX10 / dij / 2 * MIN(IMGBLK,nimgs); + //int kshloc[ksh1-ksh0+1]; + //int nkshloc = shloc_partition(kshloc, ao_loc, ksh0, ksh1, dkmax); + + int i, k, ic; + int ksh, dk, dijk, iL, jL, ksh_off, jsh_off; + int shls[3]; + + int nshi = ish1 - ish0; + int nshj = jsh1 - jsh0; + int nshij = nshi + nshj; + int idx_i, idx_j; + + int dijmc = dij * dkmax * comp; + double *bufL = buf + dijmc; + double *cache = bufL + dijmc; + double *pbuf, *pbufL; + int (*fprescreen)(); + if (pbcopt != NULL) { + fprescreen = pbcopt->fprescreen; + } else { + fprescreen = PBCnoscreen; + } + + shls[0] = ish; + shls[1] = jsh; + jsh_off = jsh - nshi; + NeighborList *nl0 = *neighbor_list; + NeighborPair *np0_ki, *np0_kj; + + int dijc = dij * comp; + for (i = 0; i < dijc; i++) { + bufL[i] = 0; + } + + for (ksh = ksh0; ksh < ksh1; ksh++){ + dk = ao_loc[ksh+1] - ao_loc[ksh]; + assert(dk < dkmax); + dijk = dij * dk; + shls[2] = ksh; + ksh_off = ksh - nshij; + np0_ki = (nl0->pairs)[ksh_off*nshi + ish]; + np0_kj = (nl0->pairs)[ksh_off*nshj + jsh_off]; + if (np0_ki->nimgs > 0 && np0_kj->nimgs > 0) { + for (idx_i = 0; idx_i < np0_ki->nimgs; idx_i++){ + iL = (np0_ki->Ls_list)[idx_i]; + shift_bas(env_loc, env, Ls, iptrxyz, iL); + for (idx_j = 0; idx_j < np0_kj->nimgs; idx_j++){ + jL = (np0_kj->Ls_list)[idx_j]; + shift_bas(env_loc, env, Ls, jptrxyz, jL); + + if ((*fprescreen)(shls, pbcopt, atm, bas, env_loc)) { + if ((*intor)(buf, NULL, shls, atm, natm, bas, nbas, + env_loc, cintopt, cache)) { + for (ic = 0; ic < comp; ic++) { + pbufL = bufL + ic * dij; + pbuf = buf + ic * dijk; + for (k = 0; k < dk; k++) { + for (i = 0; i < dij; i++) { + pbufL[i] += pbuf[i]; + } + pbuf += dij; + } + } + } + } + } + } + } + } + (*fsort)(out, bufL, shls_slice, ao_loc, comp, ish, jsh); +} + +void PBCnr3c_screened_fill_gs1(int (*intor)(), double *out, int nkpts_ij, + int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + _nr3c_screened_fill_g(intor, &sort3c_gs1, out, nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); +} + +static void sort3c_gs2_igtj(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh, int msh0, int msh1) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int ksh0 = shls_slice[4]; + const int ksh1 = shls_slice[5]; + const size_t naok = ao_loc[ksh1] - ao_loc[ksh0]; + const size_t off0 = ((size_t)ao_loc[ish0]) * (ao_loc[ish0] + 1) / 2; + const size_t nij = ((size_t)ao_loc[ish1]) * (ao_loc[ish1] + 1) / 2 - off0; + const size_t nijk = nij * naok; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + const int jp = ao_loc[jsh] - ao_loc[jsh0]; + out += (((size_t)ao_loc[ish])*(ao_loc[ish]+1)/2-off0 + jp) * naok; + + int i, j, k, ij, ksh, ic, dk, dijk; + double *pin, *pout; + + for (ksh = msh0; ksh < msh1; ksh++) { + dk = ao_loc[ksh+1] - ao_loc[ksh]; + dijk = dij * dk; + for (ic = 0; ic < comp; ic++) { + pout = out + nijk * ic + ao_loc[ksh]-ao_loc[ksh0]; + pin = in + dijk * ic; + for (i = 0; i < di; i++) { + for (j = 0; j < dj; j++) { + ij = j * di + i; + for (k = 0; k < dk; k++) { + pout[j*naok+k] = pin[k*dij+ij]; + } + } + pout += (i+ao_loc[ish]+1) * naok; + } + } + in += dijk * comp; + } +} + +void sort2c_gs2_igtj(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const size_t off0 = ((size_t)ao_loc[ish0]) * (ao_loc[ish0] + 1) / 2; + const size_t nij = ((size_t)ao_loc[ish1]) * (ao_loc[ish1] + 1) / 2 - off0; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + const int jp = ao_loc[jsh] - ao_loc[jsh0]; + out += ((size_t)ao_loc[ish])*(ao_loc[ish]+1)/2-off0 + jp; + + int i, j, ic; + double *pin, *pout; + + for (ic = 0; ic < comp; ic++) { + pout = out + nij * ic; + pin = in + dij * ic; + for (i = 0; i < di; i++) { + for (j = 0; j < dj; j++) { + pout[j] = pin[j*di+i]; + } + pout += (i+ao_loc[ish]+1); + } + } +} + +static void sort3c_gs2_ieqj(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh, int msh0, int msh1) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int ksh0 = shls_slice[4]; + const int ksh1 = shls_slice[5]; + const size_t naok = ao_loc[ksh1] - ao_loc[ksh0]; + const size_t off0 = ((size_t)ao_loc[ish0]) * (ao_loc[ish0] + 1) / 2; + const size_t nij = ((size_t)ao_loc[ish1]) * (ao_loc[ish1] + 1) / 2 - off0; + const size_t nijk = nij * naok; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dij = di * di; + const int jp = ao_loc[jsh] - ao_loc[jsh0]; + out += (((size_t)ao_loc[ish])*(ao_loc[ish]+1)/2-off0 + jp) * naok; + + int i, j, k, ij, ksh, ic, dk, dijk; + double *pin, *pout; + + for (ksh = msh0; ksh < msh1; ksh++) { + dk = ao_loc[ksh+1] - ao_loc[ksh]; + dijk = dij * dk; + for (ic = 0; ic < comp; ic++) { + pout = out + nijk * ic + ao_loc[ksh]-ao_loc[ksh0]; + pin = in + dijk * ic; + for (i = 0; i < di; i++) { + for (j = 0; j <= i; j++) { + ij = j * di + i; + for (k = 0; k < dk; k++) { + pout[j*naok+k] = pin[k*dij+ij]; + } + } + pout += (i+ao_loc[ish]+1) * naok; + } + } + in += dijk * comp; + } +} + +void sort2c_gs2_ieqj(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const size_t off0 = ((size_t)ao_loc[ish0]) * (ao_loc[ish0] + 1) / 2; + const size_t nij = ((size_t)ao_loc[ish1]) * (ao_loc[ish1] + 1) / 2 - off0; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dij = di * di; + const int jp = ao_loc[jsh] - ao_loc[jsh0]; + out += ((size_t)ao_loc[ish])*(ao_loc[ish]+1)/2-off0 + jp; + + int i, j, ic; + double *pin, *pout; + + for (ic = 0; ic < comp; ic++) { + pout = out + nij * ic; + pin = in + dij * ic; + for (i = 0; i < di; i++) { + for (j = 0; j <= i; j++) { + pout[j] = pin[j*di+i]; + } + pout += (i+ao_loc[ish]+1); + } + } +} + +void sort2c_gs1(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + const int ip = ao_loc[ish] - ao_loc[ish0]; + const int jp = ao_loc[jsh] - ao_loc[jsh0]; + const size_t naoi = ao_loc[ish1] - ao_loc[ish0]; + const size_t naoj = ao_loc[jsh1] - ao_loc[jsh0]; + const size_t nij = naoi * naoj; + out += ip * naoj + jp; + + int i, j, ic; + double *pin, *pout; + + for (ic = 0; ic < comp; ic++) { + pout = out + nij * ic; + pin = in + dij * ic; + for (i = 0; i < di; i++) { + for (j = 0; j < dj; j++) { + pout[j] = pin[j*di+i]; + } + pout += naoj; + } + } +} + +void PBCnr3c_screened_fill_gs2(int (*intor)(), double *out, int nkpts_ij, + int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + int ip = ish + shls_slice[0]; + int jp = jsh + shls_slice[2] - nbas; + if (ip > jp) { + _nr3c_screened_fill_g(intor, &sort3c_gs2_igtj, out, + nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); + } else if (ip == jp) { + _nr3c_screened_fill_g(intor, &sort3c_gs2_ieqj, out, + nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); + } +} + +void PBCnr3c_screened_sum_auxbas_fill_gs1(int (*intor)(), double *out, int nkpts_ij, + int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + _nr3c_screened_sum_auxbas_fill_g(intor, &sort2c_gs1, out, + nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); +} + +void PBCnr3c_screened_sum_auxbas_fill_gs2(int (*intor)(), double *out, int nkpts_ij, + int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + int ip = ish + shls_slice[0]; + int jp = jsh + shls_slice[2] - nbas; + if (ip > jp) { + _nr3c_screened_sum_auxbas_fill_g(intor, &sort2c_gs2_igtj, out, + nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); + } else if (ip == jp) { + _nr3c_screened_sum_auxbas_fill_g(intor, &sort2c_gs2_ieqj, out, + nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); + } +} + +static void contract_3c1e_ipik_dm_gs1(double *grad, double* dm, double *eri, + int *shls, int *ao_loc, int *atm, int natm, + int *bas, int nbas, int comp, int nao) +{ + const int ish = shls[0]; + const int jsh = shls[1]; + const int ksh = shls[2]; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + const size_t i0 = ao_loc[ish]; + const size_t j0 = ao_loc[jsh] - nao; + + const int ia = bas[ATOM_OF+ish*BAS_SLOTS]; + const int ka = bas[ATOM_OF+ksh*BAS_SLOTS] - 2*natm; + + int i, j, ic; + double *ptr_eri, *ptr_dm; + double *dm0 = dm + (i0 * nao + j0); + double ipi_dm[comp]; + for (ic = 0; ic < comp; ic++) { + ipi_dm[ic] = 0; + ptr_dm = dm0; + ptr_eri = eri + dij * ic; + for (i = 0; i < di; i++) { + for (j = 0; j < dj; j++) { + ipi_dm[ic] += ptr_eri[j*di+i] * ptr_dm[j]; + } + ptr_dm += nao; + } + } + + for (ic = 0; ic < comp; ic++) { + grad[ia*comp+ic] += ipi_dm[ic]; + grad[ka*comp+ic] -= ipi_dm[ic]; + } +} + +static void _nr3c1e_screened_nuc_grad_fill_g(int (*intor)(), void (*fcontract)(), + double *grad, double *dm, int nkpts_ij, int nkpts, + int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, int nao, + NeighborList** neighbor_list) +{ + const int ish0 = shls_slice[0]; + //const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + //const int jsh1 = shls_slice[3]; + const int ksh0 = shls_slice[4]; + const int ksh1 = shls_slice[5]; + + ish += ish0; + jsh += jsh0; + int iptrxyz = atm[PTR_COORD+bas[ATOM_OF+ish*BAS_SLOTS]*ATM_SLOTS]; + int jptrxyz = atm[PTR_COORD+bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS]; + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + int dkmax = INTBUFMAX10 / dij / 2 * MIN(IMGBLK,nimgs); + //int kshloc[ksh1-ksh0+1]; + //int nkshloc = shloc_partition(kshloc, ao_loc, ksh0, ksh1, dkmax); + + int i, k, ic; + int ksh, dk, dijk, iL, jL, ksh_off, jsh_off; + int shls[3]; + + int idx_i, idx_j; + + int dijc = dij * comp; + int dijmc = dijc * dkmax; + double *bufL = buf + dijmc; + double *cache = bufL + dijc; + double *pbuf, *pbufL; + int (*fprescreen)(); + if (pbcopt != NULL) { + fprescreen = pbcopt->fprescreen; + } else { + fprescreen = PBCnoscreen; + } + + shls[0] = ish; + shls[1] = jsh; + jsh_off = jsh - nbas; + NeighborList *nl0 = *neighbor_list; + NeighborPair *np0_ki, *np0_kj; + + for (ksh = ksh0; ksh < ksh1; ksh++){ + dk = ao_loc[ksh+1] - ao_loc[ksh]; + assert(dk < dkmax); + dijk = dij * dk; + shls[2] = ksh; + ksh_off = ksh - nbas*2; + np0_ki = (nl0->pairs)[ksh_off*nbas + ish]; + np0_kj = (nl0->pairs)[ksh_off*nbas + jsh_off]; + if (np0_ki->nimgs > 0 && np0_kj->nimgs > 0) { + for (i = 0; i < dijc; i++) { + bufL[i] = 0; + } + for (idx_i = 0; idx_i < np0_ki->nimgs; idx_i++){ + iL = (np0_ki->Ls_list)[idx_i]; + shift_bas(env_loc, env, Ls, iptrxyz, iL); + for (idx_j = 0; idx_j < np0_kj->nimgs; idx_j++){ + jL = (np0_kj->Ls_list)[idx_j]; + shift_bas(env_loc, env, Ls, jptrxyz, jL); + + if ((*fprescreen)(shls, pbcopt, atm, bas, env_loc)) { + if ((*intor)(buf, NULL, shls, atm, natm, bas, nbas, + env_loc, cintopt, cache)) + { + for (ic = 0; ic < comp; ic++) { + pbufL = bufL + ic * dij; + pbuf = buf + ic * dijk; + for (k = 0; k < dk; k++) { + for (i = 0; i < dij; i++) { + pbufL[i] += pbuf[i]; + } + pbuf += dij; + } + } + } + } + } + } + (*fcontract)(grad, dm, bufL, shls, ao_loc, atm, natm, bas, nbas, comp, nao); + } + } +} + +void PBCnr3c1e_screened_nuc_grad_fill_gs1(int (*intor)(), double *out, double* dm, + int nkpts_ij, int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, int nao, + NeighborList** neighbor_list) +{ + _nr3c1e_screened_nuc_grad_fill_g(intor, &contract_3c1e_ipik_dm_gs1, out, dm, + nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, nao, neighbor_list); +} + +void PBCnr3c_screened_drv(int (*intor)(), void (*fill)(), double complex *eri, + int nkpts_ij, int nkpts, int comp, int nimgs, + double *Ls, double complex *expkL, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, int nenv, + NeighborList** neighbor_list) +{ + assert(neighbor_list != NULL); + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + double *expkL_r = malloc(sizeof(double) * nimgs*nkpts * OF_CMPLX); + double *expkL_i = expkL_r + nimgs*nkpts; + int i; + for (i = 0; i < nimgs*nkpts; i++) { + expkL_r[i] = creal(expkL[i]); + expkL_i[i] = cimag(expkL[i]); + } + + size_t count; + count = (nkpts * OF_CMPLX + nimgs) * INTBUFMAX10 * comp; + count+= nimgs * nkpts * OF_CMPLX; + const int cache_size = GTOmax_cache_size(intor, shls_slice, 3, + atm, natm, bas, nbas, env); + +#pragma omp parallel +{ + int ish, jsh, ij; + double *env_loc = malloc(sizeof(double)*nenv); + NPdcopy(env_loc, env, nenv); + double *buf = malloc(sizeof(double)*(count+cache_size)); +#pragma omp for schedule(dynamic) + for (ij = 0; ij < nish*njsh; ij++) { + ish = ij / njsh; + jsh = ij % njsh; + (*fill)(intor, eri, nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); + } + free(buf); + free(env_loc); +} + free(expkL_r); +} + +void PBCnr3c_screened_sum_auxbas_drv(int (*intor)(), void (*fill)(), double complex *eri, + int nkpts_ij, int nkpts, int comp, int nimgs, + double *Ls, double complex *expkL, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, int nenv, + NeighborList** neighbor_list) +{ + assert(neighbor_list != NULL); + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + double *expkL_r=NULL, *expkL_i=NULL; + //expkL_r = malloc(sizeof(double) * nimgs*nkpts * OF_CMPLX); + //expkL_i = expkL_r + nimgs*nkpts; + //int i; + //for (i = 0; i < nimgs*nkpts; i++) { + // expkL_r[i] = creal(expkL[i]); + // expkL_i[i] = cimag(expkL[i]); + //} + + size_t count; + count = (nkpts * OF_CMPLX + nimgs) * INTBUFMAX10 * comp; + count+= nimgs * nkpts * OF_CMPLX; + const int cache_size = GTOmax_cache_size(intor, shls_slice, 3, + atm, natm, bas, nbas, env); + +#pragma omp parallel +{ + int ish, jsh, ij; + double *env_loc = malloc(sizeof(double)*nenv); + NPdcopy(env_loc, env, nenv); + double *buf = malloc(sizeof(double)*(count+cache_size)); +#pragma omp for schedule(dynamic) + for (ij = 0; ij < nish*njsh; ij++) { + ish = ij / njsh; + jsh = ij % njsh; + (*fill)(intor, eri, nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); + } + free(buf); + free(env_loc); +} + //free(expkL_r); +} + +void PBCnr3c1e_screened_nuc_grad_drv(int (*intor)(), void (*fill)(), + double* grad, double* dm, + int nkpts_ij, int nkpts, int comp, int nimgs, + double *Ls, double complex *expkL, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, int nenv, int nao, + NeighborList** neighbor_list) +{ + assert(neighbor_list != NULL); + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + double *expkL_r=NULL, *expkL_i=NULL; + //double *expkL_r = malloc(sizeof(double) * nimgs*nkpts * OF_CMPLX); + //double *expkL_i = expkL_r + nimgs*nkpts; + //int i; + //for (i = 0; i < nimgs*nkpts; i++) { + // expkL_r[i] = creal(expkL[i]); + // expkL_i[i] = cimag(expkL[i]); + //} + + size_t count; + count = (nkpts * OF_CMPLX + nimgs) * INTBUFMAX10 * comp; + count+= nimgs * nkpts * OF_CMPLX; + const int cache_size = GTOmax_cache_size(intor, shls_slice, 3, + atm, natm, bas, nbas, env); + + double *gradbufs[MAX_THREADS]; +#pragma omp parallel +{ + int ish, jsh, ij; + double *env_loc = malloc(sizeof(double)*nenv); + NPdcopy(env_loc, env, nenv); + double *grad_loc; + int thread_id = omp_get_thread_num(); + if (thread_id == 0) { + grad_loc = grad; + } else { + grad_loc = calloc(natm*comp, sizeof(double)); + } + gradbufs[thread_id] = grad_loc; + + double *buf = malloc(sizeof(double)*(count+cache_size)); + #pragma omp for schedule(dynamic) + for (ij = 0; ij < nish*njsh; ij++) { + ish = ij / njsh; + jsh = ij % njsh; + (*fill)(intor, grad_loc, dm, nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, nao, neighbor_list); + } + free(buf); + free(env_loc); + + NPomp_dsum_reduce_inplace(gradbufs, natm*comp); + if (thread_id != 0) { + free(grad_loc); + } +} + //free(expkL_r); +} + + +static int _nr2c_screened_fill( + int (*intor)(), double complex *out, + int nkpts, int comp, int nimgs, int jsh, int ish0, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, + int *shls_slice, int *ao_loc, CINTOpt *cintopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nshi = ish1 - shls_slice[0]; + const int nshj = jsh1 - jsh0; + + const double D1 = 1; + const int I1 = 1; + + ish0 += shls_slice[0]; + jsh += jsh0; + int jsh_off = jsh - nshi; + int jptrxyz = atm[PTR_COORD+bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + int dimax = INTBUFMAX10 / dj; + int ishloc[ish1-ish0+1]; + int nishloc = shloc_partition(ishloc, ao_loc, ish0, ish1, dimax); + + int m, msh0, msh1, dijc, dmjc, ish, di, empty; + int jL, idx_j; + int shls[2]; + double *bufk_r = buf; + double *bufk_i, *bufL, *pbufk_r, *pbufk_i, *cache; + + NeighborList *nl0 = *neighbor_list; + NeighborPair *np0; + + shls[1] = jsh; + for (m = 0; m < nishloc; m++) { + msh0 = ishloc[m]; + msh1 = ishloc[m+1]; + dimax = ao_loc[msh1] - ao_loc[msh0]; + dmjc = dj * dimax * comp; + bufk_i = bufk_r + dmjc * nkpts; + bufL = bufk_i + dmjc * nkpts; + cache = bufL + dmjc; + + memset(bufk_r, 0, 2*dmjc*nkpts*sizeof(double)); + pbufk_r = bufk_r; + pbufk_i = bufk_i; + for (ish = msh0; ish < msh1; ish++) { + shls[0] = ish; + di = ao_loc[ish+1] - ao_loc[ish]; + dijc = di * dj * comp; + np0 = (nl0->pairs)[ish*nshj + jsh_off]; + if (np0->nimgs > 0) { + for (idx_j = 0; idx_j < np0->nimgs; idx_j++){ + jL = (np0->Ls_list)[idx_j]; + shift_bas(env_loc, env, Ls, jptrxyz, jL); + if ((*intor)(bufL, NULL, shls, atm, natm, bas, nbas, + env_loc, cintopt, cache)) { + empty = 0; + dger_(&dijc, &nkpts, &D1, bufL, &I1, + expkL_r+jL, &nimgs, pbufk_r, &dmjc); + dger_(&dijc, &nkpts, &D1, bufL, &I1, + expkL_i+jL, &nimgs, pbufk_i, &dmjc); + } + } + } + pbufk_r += dijc; + pbufk_i += dijc; + } + sort2c_ks1(out, bufk_r, bufk_i, shls_slice, ao_loc, + nkpts, comp, jsh, msh0, msh1); + } + return !empty; +} + +void PBCnr2c_screened_fill_ks1(int (*intor)(), double complex *out, + int nkpts, int comp, int nimgs, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, + int *shls_slice, int *ao_loc, CINTOpt *cintopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + _nr2c_screened_fill(intor, out, nkpts, comp, nimgs, jsh, 0, + buf, env_loc, Ls, expkL_r, expkL_i, shls_slice, ao_loc, + cintopt, atm, natm, bas, nbas, env, neighbor_list); +} + +void PBCnr2c_screened_fill_ks2(int (*intor)(), double complex *out, + int nkpts, int comp, int nimgs, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, + int *shls_slice, int *ao_loc, CINTOpt *cintopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + _nr2c_screened_fill(intor, out, nkpts, comp, nimgs, jsh, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, shls_slice, ao_loc, + cintopt, atm, natm, bas, nbas, env, neighbor_list); +} + +void PBCnr2c_screened_drv(int (*intor)(), void (*fill)(), double complex *out, + int nkpts, int comp, int nimgs, + double *Ls, double complex *expkL, + int *shls_slice, int *ao_loc, CINTOpt *cintopt, + int *atm, int natm, int *bas, int nbas, double *env, int nenv, + NeighborList** neighbor_list) +{ + assert(neighbor_list != NULL); + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int njsh = jsh1 - jsh0; + double *expkL_r = malloc(sizeof(double) * nimgs*nkpts * OF_CMPLX); + double *expkL_i = expkL_r + nimgs*nkpts; + int i; + for (i = 0; i < nimgs*nkpts; i++) { + expkL_r[i] = creal(expkL[i]); + expkL_i[i] = cimag(expkL[i]); + } + const int cache_size = GTOmax_cache_size(intor, shls_slice, 2, + atm, natm, bas, nbas, env); + +#pragma omp parallel +{ + int jsh; + double *env_loc = malloc(sizeof(double)*nenv); + NPdcopy(env_loc, env, nenv); + size_t count = (nkpts+1) * OF_CMPLX; + double *buf = malloc(sizeof(double)*(count*INTBUFMAX10*comp+cache_size)); +#pragma omp for schedule(dynamic) + for (jsh = 0; jsh < njsh; jsh++) { + (*fill)(intor, out, nkpts, comp, nimgs, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, + shls_slice, ao_loc, cintopt, atm, natm, bas, nbas, env, + neighbor_list); + } + free(buf); + free(env_loc); +} + free(expkL_r); +} diff --git a/pyscf/lib/pbc/hf_grad.c b/pyscf/lib/pbc/hf_grad.c new file mode 100644 index 0000000000..7c781fba19 --- /dev/null +++ b/pyscf/lib/pbc/hf_grad.c @@ -0,0 +1,95 @@ +/* Copyright 2021-2024 The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include "config.h" +#include "vhf/fblas.h" +#include "np_helper/np_helper.h" +#include "pbc/neighbor_list.h" + +#define MAX_THREADS 256 + +void contract_vhf_dm(double* out, double* vhf, double* dm, + NeighborList** neighbor_list, + int* shls_slice, int* ao_loc, int* shls_atm, + int comp, int natm, int nbas) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + const size_t nijsh = (size_t)nish * njsh; + const size_t naoi = ao_loc[ish1] - ao_loc[ish0]; + const size_t naoj = ao_loc[jsh1] - ao_loc[jsh0]; + + const int I1 = 1; + double *out_bufs[MAX_THREADS]; + +#pragma omp parallel +{ + size_t ij, ish, jsh, p0, q0; + int ni, nj, i, ic, iatm, nimgs=1; + NeighborList *nl0=NULL; + if (neighbor_list != NULL) { + nl0 = *neighbor_list; + } + double *pvhf, *pdm; + + int thread_id = omp_get_thread_num(); + double *buf; + if (thread_id == 0) { + buf = out; + } else { + buf = calloc(comp*natm, sizeof(double)); + } + out_bufs[thread_id] = buf; + + #pragma omp for schedule(dynamic) + for (ij = 0; ij < nijsh; ij++) { + ish = ij / njsh + ish0; + jsh = ij % njsh + jsh0; + + if (nl0 != NULL) { + nimgs = ((nl0->pairs)[ish*nbas + jsh])->nimgs; + } + if (nimgs > 0) { // this shell pair has contribution + p0 = ao_loc[ish] - ao_loc[ish0]; + q0 = ao_loc[jsh] - ao_loc[jsh0]; + ni = ao_loc[ish+1] - ao_loc[ish]; + nj = ao_loc[jsh+1] - ao_loc[jsh]; + + iatm = shls_atm[ish]; + pvhf = vhf + (p0 * naoj + q0); + pdm = dm + (p0 * naoj + q0); + for (ic = 0; ic < comp; ic++) { + for (i = 0; i < ni; i++) { + buf[iatm*3+ic] += ddot_(&nj, pvhf+i*naoj, &I1, pdm+i*naoj, &I1); + } + pvhf += naoi * naoj; + } + } + } + + NPomp_dsum_reduce_inplace(out_bufs, comp*natm); + if (thread_id != 0) { + free(buf); + } +} +} diff --git a/pyscf/lib/pbc/neighbor_list.c b/pyscf/lib/pbc/neighbor_list.c new file mode 100644 index 0000000000..26fb52fd37 --- /dev/null +++ b/pyscf/lib/pbc/neighbor_list.c @@ -0,0 +1,206 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include "config.h" +#include "cint.h" +#include "pbc/neighbor_list.h" + +#define SQUARE(r) (r[0]*r[0]+r[1]*r[1]+r[2]*r[2]) + +void init_neighbor_pair(NeighborPair** np, int nimgs, int* Ls_list) +{ + NeighborPair *np0 = (NeighborPair*) malloc(sizeof(NeighborPair)); + np0->nimgs = nimgs; + np0->q_cond = NULL; + np0->center = NULL; + if (nimgs > 0){ + np0->Ls_list = (int*) malloc(sizeof(int)*nimgs); + int i; + for (i=0; iLs_list[i] = Ls_list[i]; + } + } + else { + np0->Ls_list = NULL; + } + *np = np0; +} + +void del_neighbor_pair(NeighborPair** np) +{ + NeighborPair *np0 = *np; + if (!np0) { + return; + } + if (np0->Ls_list) { + free(np0->Ls_list); + } + if (np0->q_cond) { + free(np0->q_cond); + } + if (np0->center) { + free(np0->center); + } + free(np0); + *np = NULL; +} + +void init_neighbor_list(NeighborList** nl, int nish, int njsh, int nimgs) +{ + NeighborList *nl0 = (NeighborList*) malloc(sizeof(NeighborList)); + nl0->nish = nish; + nl0->njsh = njsh; + nl0->nimgs = nimgs; + nl0->pairs = (NeighborPair**) malloc(sizeof(NeighborPair*)*nish*njsh); + int ish, jsh; + for (ish=0; ishpairs)[ish*njsh+jsh] = NULL; + } + *nl = nl0; +} + +void build_neighbor_list(NeighborList** nl, + int* ish_atm, int* ish_bas, double* ish_env, double* ish_rcut, + int* jsh_atm, int* jsh_bas, double* jsh_env, double* jsh_rcut, + int nish, int njsh, double* Ls, int nimgs, int hermi) +{ + init_neighbor_list(nl, nish, njsh, nimgs); + NeighborList* nl0 = *nl; + +#pragma omp parallel +{ + int *buf = (int*) malloc(sizeof(int)*nimgs); + int ish, jsh, iL, nL; + int ish_atm_id, jsh_atm_id; + double ish_radius, jsh_radius, rmax, dij; + double *ish_ratm, *jsh_ratm, *rL; + double rij[3]; + NeighborPair **np = NULL; +#pragma omp for schedule(dynamic) + for (ish=0; ishpairs + ish*njsh+jsh; + init_neighbor_pair(np, nL, buf); + } + } + free(buf); +} +} + +void del_neighbor_list(NeighborList** nl) +{ + NeighborList *nl0 = *nl; + if (!nl0) { + return; + } + int ish, jsh; + int nish = nl0->nish; + int njsh = nl0->njsh; + if (nl0->pairs) { + for (ish=0; ishpairs + ish*njsh+jsh); + } + } + free(nl0->pairs); + } + free(nl0); + *nl = NULL; +} + + +int NLOpt_noscreen(int* shls, NeighborListOpt* opt) +{ + return 1; +} + +int NLOpt_screen(int* shls, NeighborListOpt* opt) +{ + int ish = shls[0]; + int jsh = shls[1]; + NeighborList *nl = opt->nl; + int njsh = nl->njsh; + NeighborPair *np; + np = (nl->pairs)[ish*njsh + jsh]; + return np->nimgs > 0; +} + +void NLOpt_init(NeighborListOpt **opt) +{ + NeighborListOpt *opt0 = malloc(sizeof(NeighborListOpt)); + opt0->nl = NULL; + opt0->fprescreen = &NLOpt_noscreen; + *opt = opt0; +} + +void NLOpt_del(NeighborListOpt **opt) +{ + NeighborListOpt *opt0 = *opt; + if (!opt0) { + return; + } + free(opt0); + *opt = NULL; +} + +void NLOpt_set_nl(NeighborListOpt *opt, NeighborList *nl) +{ + opt->nl = nl; +} + +void NLOpt_reset(NeighborListOpt *opt) +{ + opt->nl = NULL; + opt->fprescreen = &NLOpt_screen; +} + +void NLOpt_set_optimizer(NeighborListOpt *opt) +{ + opt->fprescreen = &NLOpt_screen; +} + +void NLOpt_del_optimizer(NeighborListOpt *opt) +{ + opt->fprescreen = &NLOpt_noscreen; +} + diff --git a/pyscf/lib/pbc/neighbor_list.h b/pyscf/lib/pbc/neighbor_list.h new file mode 100644 index 0000000000..3364be1f3d --- /dev/null +++ b/pyscf/lib/pbc/neighbor_list.h @@ -0,0 +1,41 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#ifndef HAVE_DEFINED_NEIGHBOR_LIST_H +#define HAVE_DEFINED_NEIGHBOR_LIST_H +typedef struct NeighborPair_struct { + int nimgs; + int *Ls_list; + double *q_cond; + double *center; +} NeighborPair; + +typedef struct NeighborList_struct { + int nish; + int njsh; + int nimgs; + NeighborPair **pairs; +} NeighborList; + +typedef struct NeighborListOpt_struct { + NeighborList *nl; + int (*fprescreen)(int *shls, struct NeighborListOpt_struct *opt); +} NeighborListOpt; + +int NLOpt_noscreen(int* shls, NeighborListOpt* opt); +#endif diff --git a/pyscf/lib/pbc/optimizer.c b/pyscf/lib/pbc/optimizer.c index d30c81c3e8..a37494ca0a 100644 --- a/pyscf/lib/pbc/optimizer.c +++ b/pyscf/lib/pbc/optimizer.c @@ -17,6 +17,7 @@ */ #include +#include #include "cint.h" #include "pbc/optimizer.h" @@ -27,6 +28,7 @@ void PBCinit_optimizer(PBCOpt **opt, int *atm, int natm, { PBCOpt *opt0 = malloc(sizeof(PBCOpt)); opt0->rrcut = NULL; + opt0->rcut = NULL; opt0->fprescreen = &PBCnoscreen; *opt = opt0; } @@ -41,11 +43,13 @@ void PBCdel_optimizer(PBCOpt **opt) if (opt0->rrcut != NULL) { free(opt0->rrcut); } + if (!opt0->rcut) { + free(opt0->rcut); + } free(opt0); *opt = NULL; } - int PBCnoscreen(int *shls, PBCOpt *opt, int *atm, int *bas, double *env) { return 1; @@ -68,6 +72,23 @@ int PBCrcut_screen(int *shls, PBCOpt *opt, int *atm, int *bas, double *env) return (rr < opt->rrcut[ish] || rr < opt->rrcut[jsh]); } +int PBCrcut_screen_loose(int *shls, PBCOpt *opt, int *atm, int *bas, double *env) +{ + if (opt == NULL) { + return 1; // no screen + } + const int ish = shls[0]; + const int jsh = shls[1]; + const double *ri = env + atm[bas[ATOM_OF+ish*BAS_SLOTS]*ATM_SLOTS+PTR_COORD]; + const double *rj = env + atm[bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS+PTR_COORD]; + double rirj[3]; + rirj[0] = ri[0] - rj[0]; + rirj[1] = ri[1] - rj[1]; + rirj[2] = ri[2] - rj[2]; + double r = sqrt(SQUARE(rirj)); + return r < opt->rcut[ish] + opt->rcut[jsh]; +} + void PBCset_rcut_cond(PBCOpt *opt, double *rcut, int *atm, int natm, int *bas, int nbas, double *env) { @@ -82,3 +103,18 @@ void PBCset_rcut_cond(PBCOpt *opt, double *rcut, opt->rrcut[i] = rcut[i] * rcut[i]; } } + +void PBCset_rcut_cond_loose(PBCOpt *opt, double *rcut, + int *atm, int natm, int *bas, int nbas, double *env) +{ + if (opt->rcut != NULL) { + free(opt->rcut); + } + opt->rcut = (double *)malloc(sizeof(double) * nbas); + opt->fprescreen = &PBCrcut_screen_loose; + + int i; + for (i = 0; i < nbas; i++) { + opt->rcut[i] = rcut[i]; + } +} diff --git a/pyscf/lib/pbc/optimizer.h b/pyscf/lib/pbc/optimizer.h index ff3299715b..62c8be5d32 100644 --- a/pyscf/lib/pbc/optimizer.h +++ b/pyscf/lib/pbc/optimizer.h @@ -16,10 +16,11 @@ * Author: Qiming Sun */ -#if !defined(HAVE_DEFINED_CVHFOPT_H) -#define HAVE_DEFINED_CVHFOPT_H +#if !defined(HAVE_DEFINED_PBCOPT_H) +#define HAVE_DEFINED_PBCOPT_H typedef struct PBCOpt_struct { double *rrcut; + double *rcut; int (*fprescreen)(int *shls, struct PBCOpt_struct *opt, int *atm, int *bas, double *env); } PBCOpt; @@ -27,4 +28,3 @@ typedef struct PBCOpt_struct { int PBCnoscreen(int *shls, PBCOpt *opt, int *atm, int *bas, double *env); int PBCrcut_screen(int *shls, PBCOpt *opt, int *atm, int *bas, double *env); - diff --git a/pyscf/lib/pbc/pp.c b/pyscf/lib/pbc/pp.c new file mode 100644 index 0000000000..4885080544 --- /dev/null +++ b/pyscf/lib/pbc/pp.c @@ -0,0 +1,448 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include +#include "config.h" +#include "cint.h" +#include "gto/gto.h" +#include "vhf/fblas.h" +#include "np_helper/np_helper.h" +#include "pbc/fill_ints.h" +#include "pbc/neighbor_list.h" + +#define HL_TABLE_SLOTS 7 +//#define ATOM_OF 0 +//#define ANG_OF 1 +#define HL_DIM_OF 2 +#define HL_DATA_OF 3 +#define HL_OFFSET0 4 +#define HF_OFFSET1 5 +#define HF_OFFSET2 6 +#define MAX_THREADS 256 + + +static void _ppnl_fill_g(void (*fsort)(), double* out, double** ints, + int comp, int ish, int jsh, double* buf, + int *shls_slice, int *ao_loc, + int* hl_table, double* hl_data, int nhl, + NeighborListOpt* nlopt) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + + ish += ish0; + jsh += jsh0; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di *dj; + const int ioff = ao_loc[ish] - ao_loc[ish0]; + const int joff = ao_loc[jsh] - ao_loc[jsh0]; + const int naoi = ao_loc[ish1] - ao_loc[ish0]; + const int naoj = ao_loc[jsh1] - ao_loc[jsh0]; + + int i, j, ij, pi, pj, ksh; + int hl_dim, nd; + int shls_ki[2], shls_kj[2]; + int *table, *offset; + double *hl; + for (ij = 0; ij < dij; ij++) { + buf[ij] = 0; + } + + int (*fprescreen)(); + if (nlopt != NULL) { + fprescreen = nlopt->fprescreen; + } else { + fprescreen = NLOpt_noscreen; + } + + const char TRANS_N = 'N'; + const char TRANS_T = 'T'; + const double D1 = 1.; + for (ksh = 0; ksh < nhl; ksh++) { + shls_ki[0] = ksh; + shls_ki[1] = ish; + shls_kj[0] = ksh; + shls_kj[1] = jsh; + if ((*fprescreen)(shls_ki, nlopt) && (*fprescreen)(shls_kj, nlopt)) { + table = hl_table + ksh * HL_TABLE_SLOTS; + hl_dim = table[HL_DIM_OF]; + nd = table[ANG_OF] * 2 + 1; + offset = table + HL_OFFSET0; + hl = hl_data + table[HL_DATA_OF]; + for (i=0; i jp) { + _ppnl_fill_g(&sort2c_gs2_igtj, out, ints, comp, ish, jsh, buf, + shls_slice, ao_loc, hl_table, hl_data, nhl, nlopt); + } else if (ip == jp) { + _ppnl_fill_g(&sort2c_gs2_ieqj, out, ints, comp, ish, jsh, buf, + shls_slice, ao_loc, hl_table, hl_data, nhl, nlopt); + } +} + + +void contract_ppnl(void (*fill)(), double* out, + double* ppnl_half0, double* ppnl_half1, double* ppnl_half2, + int comp, int* shls_slice, int *ao_loc, + int* hl_table, double* hl_data, int nhl, + NeighborListOpt* nlopt) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + const size_t nijsh = (size_t) nish * njsh; + + double *ints[3] = {ppnl_half0, ppnl_half1, ppnl_half2}; + + int di = GTOmax_shell_dim(ao_loc, shls_slice+0, 1); + int dj = GTOmax_shell_dim(ao_loc, shls_slice+2, 1); + size_t buf_size = di*dj*comp; + + #pragma omp parallel + { + int ish, jsh; + size_t ij; + double *buf = (double*) malloc(sizeof(double) * buf_size); + #pragma omp for schedule(dynamic) + for (ij = 0; ij < nijsh; ij++) { + ish = ij / njsh; + jsh = ij % njsh; + (*fill)(out, ints, comp, ish, jsh, buf, + shls_slice, ao_loc, hl_table, hl_data, nhl, nlopt); + } + free(buf); + } +} + + +void contract_ppnl_ip1(double* out, int comp, + double* ppnl_half0, double* ppnl_half1, double* ppnl_half2, + double* ppnl_half_ip2_0, double* ppnl_half_ip2_1, double* ppnl_half_ip2_2, + int* hl_table, double* hl_data, int nhl, int nao, int* naux, + int* aux_id) +{ + const int One = 1; + const char TRANS_N = 'N'; + //const char TRANS_T = 'T'; + const double D1 = 1.; + const double D0 = 0.; + + size_t nao_pair = (size_t) nao * nao; + memset(out, 0, nao_pair*comp*sizeof(double)); + + size_t n2[3]; + n2[0] = (size_t) nao * naux[0]; + n2[1] = (size_t) nao * naux[1]; + n2[2] = (size_t) nao * naux[2]; + size_t buf_size = 54 * (size_t) nao + 27; + +#pragma omp parallel +{ + size_t ib, id, i, p, ic; + double *pout; + double *buf = (double*) malloc(sizeof(double)*buf_size); + + #pragma omp for schedule(dynamic) + for (p = 0; p < nao; p++){ + pout = out + (size_t)p*nao; + for (id = 0; id < nhl; id++) { + ib = aux_id[id]; + int *table = hl_table + ib * HL_TABLE_SLOTS; + int hl_dim = table[HL_DIM_OF]; + int ptr = table[HL_DATA_OF]; + int nd = table[ANG_OF] * 2 + 1; + int *offset = table + HL_OFFSET0; + double *hl = hl_data + ptr; + int lp_dim = nd * nao; + int ilp_dim = hl_dim * lp_dim; + int il_dim = hl_dim * nd; + + double *ilp = buf; + double *ilp_ip2 = ilp + ilp_dim; + double *hilp = ilp_ip2 + nd*3; + for (ic = 0; ic < comp; ic++) { + for (i=0; ifprescreen; + } else { + fprescreen = NLOpt_noscreen; + } + + const char TRANS_N = 'N'; + const char TRANS_T = 'T'; + const double D1 = 1.; + + int i, j, pi, pj, ksh, ic; + int katm, l, hl_dim, nd; + int shls_ki[2], shls_kj[2]; + int *table, *offset; + double *hl; + for (ksh = 0; ksh < nhl; ksh++) { + shls_ki[0] = ksh; + shls_ki[1] = ish; + shls_kj[0] = ksh; + shls_kj[1] = jsh; + if ((*fprescreen)(shls_ki, nlopt) && (*fprescreen)(shls_kj, nlopt)) { + table = hl_table + ksh * HL_TABLE_SLOTS; + katm = table[ATOM_OF]; + l = table[ANG_OF]; + hl_dim = table[HL_DIM_OF]; + nd = 2 * l + 1; + offset = table + HL_OFFSET0; + hl = hl_data + table[HL_DATA_OF]; + + memset(buf, 0, dijm*sizeof(double)); + for (ic = 0; ic < comp; ic++) { + for (i=0; i 0) { + if (ig == G0idx) { + vlocG = -2. * M_PI * Z[ia] * r0*r0; + } + else { + vlocG = Z[ia] * coulG[ig] * exp(-0.5*r0*r0 * G2[ig]); + } + } + else { // Z/r + vlocG = Z[ia] * coulG[ig]; + } + out[ig] -= (vlocG * cos(RG)) - (vlocG * sin(RG)) * _Complex_I; + } + } +} +} diff --git a/pyscf/lib/test/test_numint_uniform_grid.py b/pyscf/lib/test/test_numint_uniform_grid.py index 296dcbd61a..05e5664ab0 100644 --- a/pyscf/lib/test/test_numint_uniform_grid.py +++ b/pyscf/lib/test/test_numint_uniform_grid.py @@ -7,7 +7,7 @@ from pyscf.pbc.dft import gen_grid from pyscf.pbc.dft import multigrid -from pyscf.pbc.dft.multigrid import eval_mat, eval_rho +from pyscf.pbc.dft.multigrid.multigrid import eval_mat, eval_rho def uncontract(cell): pcell, contr_coeff = cell.to_uncontracted_cartesian_basis() @@ -18,8 +18,8 @@ def setUpModule(): global bak_EXPDROP, bak_EXTRA_PREC global vxc, kpts, nkpts, nao, dm, dm_kpts, grids_orth, grids_north global ao_kpts_orth, ao_kpts_north, ao_orth, ao_north, ao_gamma_orth, ao_gamma_north - multigrid.EXPDROP, bak_EXPDROP = 1e-14, multigrid.EXPDROP - multigrid.EXTRA_PREC, bak_EXTRA_PREC = 1e-3, multigrid.EXTRA_PREC + multigrid.multigrid.EXPDROP, bak_EXPDROP = 1e-14, multigrid.multigrid.EXPDROP + multigrid.multigrid.EXTRA_PREC, bak_EXTRA_PREC = 1e-3, multigrid.multigrid.EXTRA_PREC numpy.random.seed(2) cell_orth = gto.M(atom='H1 1 1 0; H2 0 0 1', diff --git a/pyscf/pbc/df/incore.py b/pyscf/pbc/df/incore.py index 253250a405..76c23f8e3e 100644 --- a/pyscf/pbc/df/incore.py +++ b/pyscf/pbc/df/incore.py @@ -30,6 +30,7 @@ from pyscf.pbc.tools import k2gamma from pyscf.pbc.tools import pbc as pbctools from pyscf import __config__ +from pyscf.pbc.gto import _pbcintor RCUT_THRESHOLD = getattr(__config__, 'pbc_scf_rsjk_rcut_threshold', 2.5) KECUT_THRESHOLD = getattr(__config__, 'pbc_scf_rsjk_kecut_threshold', 10.0) @@ -471,3 +472,246 @@ def _conc_locs(ao_loc1, ao_loc2): basis accordingly.''' comp_loc = np.append(ao_loc1[:-1], ao_loc1[-1] + ao_loc2) return np.asarray(comp_loc, dtype=np.int32) + +# The following functions use pre-constructed shell pair list +def aux_e2_sum_auxbas(cell, auxcell_or_auxbasis, intor='int3c2e', aosym='s1', comp=None, + kptij_lst=np.zeros((1,2,3)), shls_slice=None, **kwargs): + r'''Compute :math:`\sum_{L} (ij|L)` on the fly. + + Returns: + out : (nao_pair,) array + ''' + if isinstance(auxcell_or_auxbasis, gto.MoleBase): + auxcell = auxcell_or_auxbasis + else: + assert isinstance(auxcell_or_auxbasis, str) + auxcell = make_auxcell(cell, auxcell_or_auxbasis) + + int3c = wrap_int3c_sum_auxbas(cell, auxcell, intor, aosym, comp, kptij_lst, **kwargs) + out = int3c(shls_slice) + return out + +def wrap_int3c_sum_auxbas(cell, auxcell, intor='int3c2e', aosym='s1', comp=None, + kptij_lst=np.zeros((1,2,3)), cintopt=None, pbcopt=None, + neighbor_list=None): + if neighbor_list is None: + raise KeyError('Neighbor list is not initialized.') + + log = logger.new_logger(cell) + + nkptij = len(kptij_lst) + kpti = kptij_lst[:,0] + kptj = kptij_lst[:,1] + j_only = is_zero(kpti - kptj) + if j_only: + kpts = kpti + nkpts = len(kpts) + kptij_idx = np.arange(nkpts, dtype=np.int32) + else: + raise NotImplementedError + + intor = cell._add_suffix(intor) + intor, comp = gto.moleintor._get_intor_and_comp(intor, comp) + + pcell = cell.copy() + pcell._atm, pcell._bas, pcell._env = \ + atm, bas, env = gto.conc_env(cell._atm, cell._bas, cell._env, + cell._atm, cell._bas, cell._env) + ao_loc = gto.moleintor.make_loc(bas, intor) + aux_loc = auxcell.ao_loc_nr() + ao_loc = np.asarray(np.hstack([ao_loc, ao_loc[-1]+aux_loc[1:]]), + dtype=np.int32) + atm, bas, env = gto.conc_env(atm, bas, env, + auxcell._atm, auxcell._bas, auxcell._env) + + Ls = cell.get_lattice_Ls() + nimgs = len(Ls) + nbas = cell.nbas + + gamma_point_only = is_zero(kpts) + if gamma_point_only: + assert nkpts == 1 + kk_type = 'g' + expkL = np.ones(1, dtype=np.complex128) + out_dtype = np.double + else: + raise NotImplementedError + + fill = 'PBCnr3c_screened_sum_auxbas_fill_%s%s' % (kk_type, aosym[:2]) + drv = libpbc.PBCnr3c_screened_sum_auxbas_drv + + if cintopt is None: + if nbas > 0: + env[gto.PTR_EXPCUTOFF] = abs(np.log(cell.precision)) + cintopt = _vhf.make_cintopt(atm, bas, env, intor) + else: + cintopt = lib.c_null_ptr() + if intor[:3] != 'ECP': + libpbc.CINTdel_pairdata_optimizer(cintopt) + if pbcopt is None: + pbcopt = _pbcintor.PBCOpt(pcell).init_rcut_cond(pcell) + if isinstance(pbcopt, _pbcintor.PBCOpt): + cpbcopt = pbcopt._this + else: + cpbcopt = lib.c_null_ptr() + + def int3c(shls_slice=None, out=None): + t0 = (logger.process_clock(), logger.perf_counter()) + if shls_slice is None: + shls_slice = (0, nbas, 0, nbas, 0, auxcell.nbas) + shls_slice = (shls_slice[0], shls_slice[1], + nbas+shls_slice[2], nbas+shls_slice[3], + nbas*2+shls_slice[4], nbas*2+shls_slice[5]) + ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] + nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] + + if aosym[:2] == 's2': + assert ni == nj + nao_pair = (ao_loc[shls_slice[1]]*(ao_loc[shls_slice[1]]+1)//2 - + ao_loc[shls_slice[0]]*(ao_loc[shls_slice[0]]+1)//2) + else: + nao_pair = ni * nj + + if out is None: + out = np.empty((nkptij,comp,nao_pair), dtype=out_dtype) + + drv(getattr(libpbc, intor), getattr(libpbc, fill), + out.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nkptij), ctypes.c_int(nkpts), + ctypes.c_int(comp), ctypes.c_int(nimgs), + Ls.ctypes.data_as(ctypes.c_void_p), + expkL.ctypes.data_as(ctypes.c_void_p), + kptij_idx.ctypes.data_as(ctypes.c_void_p), + (ctypes.c_int*6)(*shls_slice), + ao_loc.ctypes.data_as(ctypes.c_void_p), cintopt, cpbcopt, + atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.natm), + bas.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nbas), # need to pass cell.nbas to libpbc.PBCnr3c_drv + env.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(env.size), + ctypes.byref(neighbor_list)) + + log.timer_debug1(f'pbc integral {intor}', *t0) + + if comp == 1: + out = out[:,0] + if nkptij == 1: + out = out[0] + return out + + return int3c + +def int3c1e_nuc_grad(cell, auxcell, dm, intor='int3c1e', aosym='s1', comp=3, + kptij_lst=np.zeros((1,2,3)), shls_slice=None, **kwargs): + '''Compute the nuclear gradient contribution + to the 2nd local part of PP on the fly. + See `pbc.gto.pseudo.pp_int.vpploc_part2_nuc_grad`. + + Returns: + out : (natm,comp) array + ''' + if comp != 3: + raise NotImplementedError + if aosym != 's1': + raise NotImplementedError + + int3c = wrap_int3c1e_nuc_grad(cell, auxcell, dm, intor, aosym, comp, kptij_lst, **kwargs) + out = int3c(shls_slice) + return out + +def wrap_int3c1e_nuc_grad(cell, auxcell, dm, intor='int3c1e', aosym='s1', comp=3, + kptij_lst=np.zeros((1,2,3)), cintopt=None, pbcopt=None, + neighbor_list=None): + if neighbor_list is None: + raise KeyError('Neighbor list is not initialized.') + + log = logger.new_logger(cell) + + nkptij = len(kptij_lst) + kpti = kptij_lst[:,0] + kptj = kptij_lst[:,1] + j_only = is_zero(kpti - kptj) + if j_only: + kpts = kpti + nkpts = len(kpts) + kptij_idx = np.arange(nkpts, dtype=np.int32) + else: + raise NotImplementedError + + intor = cell._add_suffix(intor) + intor, comp = gto.moleintor._get_intor_and_comp(intor, comp) + + pcell = cell.copy() + pcell._atm, pcell._bas, pcell._env = \ + atm, bas, env = gto.conc_env(cell._atm, cell._bas, cell._env, + cell._atm, cell._bas, cell._env) + ao_loc = gto.moleintor.make_loc(bas, intor) + aux_loc = auxcell.ao_loc_nr() + ao_loc = np.asarray(np.hstack([ao_loc, ao_loc[-1]+aux_loc[1:]]), + dtype=np.int32) + atm, bas, env = gto.conc_env(atm, bas, env, + auxcell._atm, auxcell._bas, auxcell._env) + + Ls = cell.get_lattice_Ls() + nimgs = len(Ls) + nbas = cell.nbas + + gamma_point_only = is_zero(kpts) + if gamma_point_only: + assert nkpts == 1 + kk_type = 'g' + expkL = np.ones(1, dtype=np.complex128) + dm = np.asarray(dm, order="C", dtype=np.double) + else: + raise NotImplementedError + + fill = 'PBCnr3c1e_screened_nuc_grad_fill_%s%s' % (kk_type, aosym[:2]) + drv = libpbc.PBCnr3c1e_screened_nuc_grad_drv + + if cintopt is None: + if nbas > 0: + env[gto.PTR_EXPCUTOFF] = abs(np.log(cell.precision)) + cintopt = _vhf.make_cintopt(atm, bas, env, intor) + else: + cintopt = lib.c_null_ptr() + if intor[:3] != 'ECP': + libpbc.CINTdel_pairdata_optimizer(cintopt) + if pbcopt is None: + pbcopt = _pbcintor.PBCOpt(pcell).init_rcut_cond(pcell) + if isinstance(pbcopt, _pbcintor.PBCOpt): + cpbcopt = pbcopt._this + else: + cpbcopt = lib.c_null_ptr() + + def int3c(shls_slice=None, out=None): + t0 = (logger.process_clock(), logger.perf_counter()) + if shls_slice is None: + shls_slice = (0, nbas, 0, nbas, 0, auxcell.nbas) + shls_slice = (shls_slice[0], shls_slice[1], + nbas+shls_slice[2], nbas+shls_slice[3], + nbas*2+shls_slice[4], nbas*2+shls_slice[5]) + + if out is None: + out = np.zeros((nkptij,cell.natm,comp), dtype=np.double) + + drv(getattr(libpbc, intor), getattr(libpbc, fill), + out.ctypes.data_as(ctypes.c_void_p), + dm.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nkptij), ctypes.c_int(nkpts), + ctypes.c_int(comp), ctypes.c_int(nimgs), + Ls.ctypes.data_as(ctypes.c_void_p), + expkL.ctypes.data_as(ctypes.c_void_p), + kptij_idx.ctypes.data_as(ctypes.c_void_p), + (ctypes.c_int*6)(*shls_slice), + ao_loc.ctypes.data_as(ctypes.c_void_p), cintopt, cpbcopt, + atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.natm), + bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nbas), + env.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(env.size), + ctypes.c_int(cell.nao), ctypes.byref(neighbor_list)) + + log.timer_debug1(f'pbc integral {intor}', *t0) + + if nkptij == 1: + out = out[0] + return out + + return int3c diff --git a/pyscf/pbc/dft/gks.py b/pyscf/pbc/dft/gks.py index 8d496bbfb1..5536b53daa 100644 --- a/pyscf/pbc/dft/gks.py +++ b/pyscf/pbc/dft/gks.py @@ -77,7 +77,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, ni = ks._numint n, exc, vxc = ni.get_vxc(cell, ks.grids, ks.xc, dm, hermi=hermi, kpt=kpt, kpts_band=kpts_band, max_memory=max_memory) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) if not hybrid: diff --git a/pyscf/pbc/dft/kgks.py b/pyscf/pbc/dft/kgks.py index f43a8ee04c..fd97e43cd1 100644 --- a/pyscf/pbc/dft/kgks.py +++ b/pyscf/pbc/dft/kgks.py @@ -84,7 +84,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, ni = ks._numint n, exc, vxc = ni.get_vxc(cell, ks.grids, ks.xc, dm, hermi=hermi, kpts=kpts, kpts_band=kpts_band, max_memory=max_memory) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) nkpts = len(kpts) diff --git a/pyscf/pbc/dft/krks.py b/pyscf/pbc/dft/krks.py index 572a7614af..3cd23636b1 100644 --- a/pyscf/pbc/dft/krks.py +++ b/pyscf/pbc/dft/krks.py @@ -69,7 +69,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = multigrid.nr_rks(ks.with_df, ks.xc, dm, hermi, kpts, kpts_band, with_j=True, return_j=False) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) return vxc @@ -84,7 +84,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, max_memory = ks.max_memory - lib.current_memory()[0] n, exc, vxc = ni.nr_rks(cell, ks.grids, ks.xc, dm, 0, hermi, kpts, kpts_band, max_memory=max_memory) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) if ks.nlc or ni.libxc.is_nlc(ks.xc): if ni.libxc.is_nlc(ks.xc): xc = ks.xc @@ -95,7 +95,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, max_memory=max_memory) exc += enlc vxc += vnlc - logger.debug(ks, 'nelec with nlc grids = %s', n) + logger.info(ks, 'nelec with nlc grids = %s', n) t0 = logger.timer(ks, 'vxc', *t0) nkpts = len(kpts) diff --git a/pyscf/pbc/dft/krks_ksymm.py b/pyscf/pbc/dft/krks_ksymm.py index fb15bf6f40..0d9e1401e2 100644 --- a/pyscf/pbc/dft/krks_ksymm.py +++ b/pyscf/pbc/dft/krks_ksymm.py @@ -59,7 +59,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = multigrid.nr_rks(ks.with_df, ks.xc, dm_bz, hermi, kpts.kpts, kpts_band, with_j=True, return_j=False) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) return vxc @@ -72,7 +72,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = ni.nr_rks(cell, ks.grids, ks.xc, dm_bz, kpts=kpts.kpts, kpts_band=kpts_band, max_memory=max_memory) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) if ks.nlc or ni.libxc.is_nlc(ks.xc): if ni.libxc.is_nlc(ks.xc): xc = ks.xc @@ -83,7 +83,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, 0, hermi, kpts.kpts, max_memory=max_memory) exc += enlc vxc += vnlc - logger.debug(ks, 'nelec with nlc grids = %s', n) + logger.info(ks, 'nelec with nlc grids = %s', n) t0 = logger.timer(ks, 'vxc', *t0) weight = kpts.weights_ibz diff --git a/pyscf/pbc/dft/kuks.py b/pyscf/pbc/dft/kuks.py index a07949ccca..634c99f8ff 100644 --- a/pyscf/pbc/dft/kuks.py +++ b/pyscf/pbc/dft/kuks.py @@ -55,7 +55,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = multigrid.nr_uks(ks.with_df, ks.xc, dm, hermi, kpts, kpts_band, with_j=True, return_j=False) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) return vxc @@ -79,7 +79,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, 0, hermi, kpts, max_memory=max_memory) exc += enlc vxc += vnlc - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) nkpts = len(kpts) diff --git a/pyscf/pbc/dft/kuks_ksymm.py b/pyscf/pbc/dft/kuks_ksymm.py index eb02e674e9..15c2a623b5 100644 --- a/pyscf/pbc/dft/kuks_ksymm.py +++ b/pyscf/pbc/dft/kuks_ksymm.py @@ -58,7 +58,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = multigrid.nr_uks(ks.with_df, ks.xc, dm_bz, hermi, kpts.kpts, kpts_band, with_j=True, return_j=False) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) return vxc @@ -71,7 +71,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = ni.nr_uks(cell, ks.grids, ks.xc, dm_bz, kpts=kpts.kpts, kpts_band=kpts_band, max_memory=max_memory) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) if ks.nlc or ni.libxc.is_nlc(ks.xc): if ni.libxc.is_nlc(ks.xc): xc = ks.xc @@ -82,7 +82,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, 0, hermi, kpts.kpts, max_memory=max_memory) exc += enlc vxc += vnlc - logger.debug(ks, 'nelec with nlc grids = %s', n) + logger.info(ks, 'nelec with nlc grids = %s', n) t0 = logger.timer(ks, 'vxc', *t0) weight = kpts.weights_ibz diff --git a/pyscf/pbc/dft/multigrid/__init__.py b/pyscf/pbc/dft/multigrid/__init__.py new file mode 100644 index 0000000000..707853bf51 --- /dev/null +++ b/pyscf/pbc/dft/multigrid/__init__.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .multigrid import MultiGridFFTDF +from .multigrid import ( + multigrid_fftdf as multigrid_fftdf, + _gen_rhf_response as _gen_rhf_response, + _gen_uhf_response as _gen_uhf_response, + nr_rks as nr_rks_v1, + nr_rks_fxc as nr_rks_fxc, + nr_rks_fxc_st as nr_rks_fxc_st, + nr_uks as nr_uks_v1, + nr_uks_fxc as nr_uks_fxc +) + +from .multigrid_pair import MultiGridFFTDF2 +from .multigrid_pair import nr_rks as nr_rks_v2 +from .multigrid_pair import nr_uks as nr_uks_v2 + +def nr_rks(mydf, xc_code, dm_kpts, hermi=1, kpts=None, + kpts_band=None, with_j=False, return_j=False, verbose=None): + if isinstance(mydf, MultiGridFFTDF2): + return nr_rks_v2(mydf, xc_code, dm_kpts, hermi=hermi, kpts=kpts, + kpts_band=kpts_band, with_j=with_j, + return_j=return_j, verbose=verbose) + elif isinstance(mydf, MultiGridFFTDF): + return nr_rks_v1(mydf, xc_code, dm_kpts, hermi=hermi, kpts=kpts, + kpts_band=kpts_band, with_j=with_j, + return_j=return_j, verbose=verbose) + else: + raise TypeError("Wrong density fitting type for multigrid DFT.") + +def nr_uks(mydf, xc_code, dm_kpts, hermi=1, kpts=None, + kpts_band=None, with_j=False, return_j=False, verbose=None): + if isinstance(mydf, MultiGridFFTDF2): + return nr_uks_v2(mydf, xc_code, dm_kpts, hermi=hermi, kpts=kpts, + kpts_band=kpts_band, with_j=with_j, + return_j=return_j, verbose=verbose) + elif isinstance(mydf, MultiGridFFTDF): + return nr_uks_v1(mydf, xc_code, dm_kpts, hermi=hermi, kpts=kpts, + kpts_band=kpts_band, with_j=with_j, + return_j=return_j, verbose=verbose) + else: + raise TypeError("Wrong density fitting type for multigrid DFT.") diff --git a/pyscf/pbc/dft/multigrid.py b/pyscf/pbc/dft/multigrid/multigrid.py similarity index 95% rename from pyscf/pbc/dft/multigrid.py rename to pyscf/pbc/dft/multigrid/multigrid.py index 80e72e551b..56fb3059cf 100644 --- a/pyscf/pbc/dft/multigrid.py +++ b/pyscf/pbc/dft/multigrid/multigrid.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2021 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ import numpy import scipy.linalg +from pyscf import __config__ from pyscf import lib from pyscf.lib import logger from pyscf.gto import ATOM_OF, ANG_OF, NPRIM_OF, PTR_EXP, PTR_COEFF @@ -29,12 +30,21 @@ from pyscf.pbc import tools from pyscf.pbc import gto from pyscf.pbc.gto import pseudo +from pyscf.pbc.gto.pseudo import pp_int from pyscf.pbc.dft import numint, gen_grid -from pyscf.pbc.df.df_jk import _format_dms, _format_kpts_band, _format_jks +from pyscf.pbc.df.df_jk import ( + _format_dms, + _format_kpts_band, + _format_jks, +) from pyscf.pbc.lib.kpts_helper import gamma_point -from pyscf.pbc.df import fft -from pyscf.pbc.df import ft_ao -from pyscf import __config__ +from pyscf.pbc.df import fft, ft_ao +from pyscf.pbc.dft.multigrid.utils import ( + _take_4d, + _take_5d, + _takebak_4d, + _takebak_5d, +) #sys.stderr.write('WARN: multigrid is an experimental feature. It is still in ' # 'testing\nFeatures and APIs may be changed in the future.\n') @@ -367,23 +377,31 @@ def get_nuc(mydf, kpts=None): vne = vne[0] return numpy.asarray(vne) -def get_pp(mydf, kpts=None): +def get_pp(mydf, kpts=None, max_memory=4000): '''Get the periodic pseudotential nuc-el AO matrix, with G=0 removed. ''' from pyscf import gto kpts, is_single_kpt = fft._check_kpts(mydf, kpts) cell = mydf.cell mesh = mydf.mesh - SI = cell.get_SI() Gv = cell.get_Gv(mesh) - vpplocG = pseudo.get_vlocG(cell, Gv) - vpplocG = -numpy.einsum('ij,ij->j', SI, vpplocG) - # from get_jvloc_G0 function - vpplocG[0] = numpy.sum(pseudo.get_alphas(cell)) - ngrids = len(vpplocG) + + ngrids = len(Gv) + vpplocG = numpy.empty((ngrids,), dtype=numpy.complex128) + + mem_avail = max(max_memory, mydf.max_memory-lib.current_memory()[0]) + blksize = int(mem_avail*1e6/((cell.natm*2)*16)) + blksize = min(ngrids, max(21**3, blksize)) + for ig0, ig1 in lib.prange(0, ngrids, blksize): + vpplocG_batch = pp_int.get_gth_vlocG_part1(cell, Gv[ig0:ig1]) + SI = cell.get_SI(Gv[ig0:ig1]) + vpplocG[ig0:ig1] = -numpy.einsum('ij,ij->j', SI, vpplocG_batch) hermi = 1 vpp = _get_j_pass2(mydf, vpplocG, hermi, kpts)[0] + vpp2 = pp_int.get_pp_loc_part2(cell, kpts) + for k, kpt in enumerate(kpts): + vpp[k] += vpp2[k] # vppnonloc evaluated in reciprocal space fakemol = gto.Mole() @@ -396,51 +414,76 @@ def get_pp(mydf, kpts=None): fakemol._bas[0,gto.PTR_EXP ] = ptr+3 fakemol._bas[0,gto.PTR_COEFF] = ptr+4 - # buf for SPG_lmi upto l=0..3 and nl=3 - buf = numpy.empty((48,ngrids), dtype=numpy.complex128) - def vppnl_by_k(kpt): - Gk = Gv + kpt - G_rad = lib.norm(Gk, axis=1) - aokG = ft_ao.ft_ao(cell, Gv, kpt=kpt) * (ngrids/cell.vol) - vppnl = 0 + SPG_lm_aoGs = [] for ia in range(cell.natm): symb = cell.atom_symbol(ia) if symb not in cell._pseudo: + SPG_lm_aoGs.append(None) continue pp = cell._pseudo[symb] p1 = 0 for l, proj in enumerate(pp[5:]): rl, nl, hl = proj if nl > 0: - fakemol._bas[0,gto.ANG_OF] = l - fakemol._env[ptr+3] = .5*rl**2 - fakemol._env[ptr+4] = rl**(l+1.5)*numpy.pi**1.25 - pYlm_part = fakemol.eval_gto('GTOval', Gk) + p1 = p1+nl*(l*2+1) + SPG_lm_aoGs.append(numpy.zeros((p1, cell.nao), dtype=numpy.complex128)) - p0, p1 = p1, p1+nl*(l*2+1) - # pYlm is real, SI[ia] is complex - pYlm = numpy.ndarray((nl,l*2+1,ngrids), dtype=numpy.complex128, buffer=buf[p0:p1]) - for k in range(nl): - qkl = pseudo.pp._qli(G_rad*rl, l, k) - pYlm[k] = pYlm_part.T * qkl - #:SPG_lmi = numpy.einsum('g,nmg->nmg', SI[ia].conj(), pYlm) - #:SPG_lm_aoG = numpy.einsum('nmg,gp->nmp', SPG_lmi, aokG) - #:tmp = numpy.einsum('ij,jmp->imp', hl, SPG_lm_aoG) - #:vppnl += numpy.einsum('imp,imq->pq', SPG_lm_aoG.conj(), tmp) - if p1 > 0: - SPG_lmi = buf[:p1] - SPG_lmi *= SI[ia].conj() - SPG_lm_aoGs = lib.zdot(SPG_lmi, aokG) + mem_avail = max(max_memory, mydf.max_memory-lib.current_memory()[0]) + blksize = int(mem_avail*1e6/((48+cell.nao+13+3)*16)) + blksize = min(ngrids, max(21**3, blksize)) + vppnl = 0 + for ig0, ig1 in lib.prange(0, ngrids, blksize): + ng = ig1 - ig0 + # buf for SPG_lmi upto l=0..3 and nl=3 + buf = numpy.empty((48,ng), dtype=numpy.complex128) + Gk = Gv[ig0:ig1] + kpt + G_rad = numpy.linalg.norm(Gk, axis=1) + aokG = ft_ao.ft_ao(cell, Gv[ig0:ig1], kpt=kpt) * (ngrids/cell.vol) + for ia in range(cell.natm): + symb = cell.atom_symbol(ia) + if symb not in cell._pseudo: + continue + pp = cell._pseudo[symb] p1 = 0 for l, proj in enumerate(pp[5:]): rl, nl, hl = proj if nl > 0: + fakemol._bas[0,gto.ANG_OF] = l + fakemol._env[ptr+3] = .5*rl**2 + fakemol._env[ptr+4] = rl**(l+1.5)*numpy.pi**1.25 + pYlm_part = fakemol.eval_gto('GTOval', Gk) + p0, p1 = p1, p1+nl*(l*2+1) - hl = numpy.asarray(hl) - SPG_lm_aoG = SPG_lm_aoGs[p0:p1].reshape(nl,l*2+1,-1) - tmp = numpy.einsum('ij,jmp->imp', hl, SPG_lm_aoG) - vppnl += numpy.einsum('imp,imq->pq', SPG_lm_aoG.conj(), tmp) + # pYlm is real, SI[ia] is complex + pYlm = numpy.ndarray((nl,l*2+1,ng), dtype=numpy.complex128, buffer=buf[p0:p1]) + for k in range(nl): + qkl = pseudo.pp._qli(G_rad*rl, l, k) + pYlm[k] = pYlm_part.T * qkl + #:SPG_lmi = numpy.einsum('g,nmg->nmg', SI[ia].conj(), pYlm) + #:SPG_lm_aoG = numpy.einsum('nmg,gp->nmp', SPG_lmi, aokG) + #:tmp = numpy.einsum('ij,jmp->imp', hl, SPG_lm_aoG) + #:vppnl += numpy.einsum('imp,imq->pq', SPG_lm_aoG.conj(), tmp) + if p1 > 0: + SPG_lmi = buf[:p1] + SPG_lmi *= cell.get_SI(Gv[ig0:ig1], atmlst=[ia,]).conj() + SPG_lm_aoGs[ia] += lib.zdot(SPG_lmi, aokG) + buf = None + for ia in range(cell.natm): + symb = cell.atom_symbol(ia) + if symb not in cell._pseudo: + continue + pp = cell._pseudo[symb] + p1 = 0 + for l, proj in enumerate(pp[5:]): + rl, nl, hl = proj + if nl > 0: + p0, p1 = p1, p1+nl*(l*2+1) + hl = numpy.asarray(hl) + SPG_lm_aoG = SPG_lm_aoGs[ia][p0:p1].reshape(nl,l*2+1,-1) + tmp = numpy.einsum('ij,jmp->imp', hl, SPG_lm_aoG) + vppnl += numpy.einsum('imp,imq->pq', SPG_lm_aoG.conj(), tmp) + SPG_lm_aoGs=None return vppnl * (1./ngrids**2) for k, kpt in enumerate(kpts): @@ -454,7 +497,6 @@ def vppnl_by_k(kpt): vpp = vpp[0] return numpy.asarray(vpp) - def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None): '''Get the Coulomb (J) AO matrix at sampled k-points. @@ -1859,7 +1901,7 @@ def get_jk(self, dm, hermi=1, kpts=None, kpts_band=None, get_rho = get_rho -def multigrid(mf): +def multigrid_fftdf(mf): '''Use MultiGridFFTDF to replace the default FFTDF integration method in the DFT object. ''' @@ -1867,56 +1909,7 @@ def multigrid(mf): mf.with_df.__dict__.update(old_df.__dict__) return mf +multigrid = multigrid_fftdf # for backward compatibility def _pgto_shells(cell): return cell._bas[:,NPRIM_OF].sum() - -def _take_4d(a, indices): - a_shape = a.shape - ranges = [] - for i, s in enumerate(indices): - if s is None: - idx = numpy.arange(a_shape[i], dtype=numpy.int32) - else: - idx = numpy.asarray(s, dtype=numpy.int32) - idx[idx < 0] += a_shape[i] - ranges.append(idx) - idx = ranges[0][:,None] * a_shape[1] + ranges[1] - idy = ranges[2][:,None] * a_shape[3] + ranges[3] - a = a.reshape(a_shape[0]*a_shape[1], a_shape[2]*a_shape[3]) - out = lib.take_2d(a, idx.ravel(), idy.ravel()) - return out.reshape([len(s) for s in ranges]) - -def _takebak_4d(out, a, indices): - out_shape = out.shape - a_shape = a.shape - ranges = [] - for i, s in enumerate(indices): - if s is None: - idx = numpy.arange(a_shape[i], dtype=numpy.int32) - else: - idx = numpy.asarray(s, dtype=numpy.int32) - idx[idx < 0] += out_shape[i] - assert (len(idx) == a_shape[i]) - ranges.append(idx) - idx = ranges[0][:,None] * out_shape[1] + ranges[1] - idy = ranges[2][:,None] * out_shape[3] + ranges[3] - nx = idx.size - ny = idy.size - out = out.reshape(out_shape[0]*out_shape[1], out_shape[2]*out_shape[3]) - lib.takebak_2d(out, a.reshape(nx,ny), idx.ravel(), idy.ravel()) - return out - -def _take_5d(a, indices): - a_shape = a.shape - a = a.reshape((a_shape[0]*a_shape[1],) + a_shape[2:]) - indices = (None,) + indices[2:] - return _take_4d(a, indices) - -def _takebak_5d(out, a, indices): - a_shape = a.shape - out_shape = out.shape - a = a.reshape((a_shape[0]*a_shape[1],) + a_shape[2:]) - out = out.reshape((out_shape[0]*out_shape[1],) + out_shape[2:]) - indices = (None,) + indices[2:] - return _takebak_4d(out, a, indices) diff --git a/pyscf/pbc/dft/multigrid/multigrid_pair.py b/pyscf/pbc/dft/multigrid/multigrid_pair.py new file mode 100644 index 0000000000..3ef43b688d --- /dev/null +++ b/pyscf/pbc/dft/multigrid/multigrid_pair.py @@ -0,0 +1,1405 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import ctypes +import numpy as np +from pyscf import __config__ +from pyscf import lib +from pyscf.lib import logger +from pyscf.gto import moleintor +from pyscf.pbc import tools +from pyscf.pbc.lib.kpts_helper import gamma_point +from pyscf.pbc.df import fft +from pyscf.pbc.df.df_jk import ( + _format_dms, + _format_kpts_band, + _format_jks, +) +from pyscf.pbc.dft.multigrid.pp import ( + _get_vpplocG_part1, + _get_pp_without_erf, + vpploc_part1_nuc_grad, +) +from pyscf.pbc.dft.multigrid.utils import ( + _take_4d, + _take_5d, + _takebak_4d, + _takebak_5d, +) +from pyscf.pbc.dft.multigrid.multigrid import MultiGridFFTDF + +NGRIDS = getattr(__config__, 'pbc_dft_multigrid_ngrids', 4) +KE_RATIO = getattr(__config__, 'pbc_dft_multigrid_ke_ratio', 3.0) +REL_CUTOFF = getattr(__config__, 'pbc_dft_multigrid_rel_cutoff', 20.0) +GGA_METHOD = getattr(__config__, 'pbc_dft_multigrid_gga_method', 'FFT') + +EXTRA_PREC = getattr(__config__, 'pbc_gto_eval_gto_extra_precision', 1e-2) +RHOG_HIGH_ORDER = getattr(__config__, 'pbc_dft_multigrid_rhog_high_order', False) +PTR_EXPDROP = 16 +EXPDROP = getattr(__config__, 'pbc_dft_multigrid_expdrop', 1e-12) +IMAG_TOL = 1e-9 + +libdft = lib.load_library('libdft') + +def gradient_gs(f_gs, Gv): + r'''Compute the G-space components of :math:`\nabla f(r)` + given :math:`f(G)` and :math:`G`, + which is equivalent to einsum('np,px->nxp', f_gs, 1j*Gv) + ''' + ng, dim = Gv.shape + assert dim == 3 + Gv = np.asarray(Gv, order='C', dtype=np.double) + f_gs = np.asarray(f_gs.reshape(-1,ng), order='C', dtype=np.complex128) + n = f_gs.shape[0] + out = np.empty((n,dim,ng), dtype=np.complex128) + + fn = getattr(libdft, 'gradient_gs', None) + try: + fn(out.ctypes.data_as(ctypes.c_void_p), + f_gs.ctypes.data_as(ctypes.c_void_p), + Gv.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(n), ctypes.c_size_t(ng)) + except Exception as e: + raise RuntimeError(f'Error in gradient_gs: {e}') + return out + + +class GridLevel_Info(ctypes.Structure): + ''' + Info about the grid levels. + ''' + _fields_ = [("nlevels", ctypes.c_int), # number of grid levels + ("rel_cutoff", ctypes.c_double), + ("cutoff", ctypes.POINTER(ctypes.c_double)), + ("mesh", ctypes.POINTER(ctypes.c_int))] + +class RS_Grid(ctypes.Structure): + ''' + Values on real space multigrid. + ''' + _fields_ = [("nlevels", ctypes.c_int), + ("gridlevel_info", ctypes.POINTER(GridLevel_Info)), + ("comp", ctypes.c_int), + # data is list of 1d arrays + ("data", ctypes.POINTER(ctypes.POINTER(ctypes.c_double)))] + +class PGFPair(ctypes.Structure): + ''' + A primitive Gaussian function pair. + ''' + _fields_ = [("ish", ctypes.c_int), + ("ipgf", ctypes.c_int), + ("jsh", ctypes.c_int), + ("jpgf", ctypes.c_int), + ("iL", ctypes.c_int), + ("radius", ctypes.c_double)] + + +class Task(ctypes.Structure): + ''' + A single task. + ''' + _fields_ = [("buf_size", ctypes.c_size_t), + ("ntasks", ctypes.c_size_t), + ("pgfpairs", ctypes.POINTER(ctypes.POINTER(PGFPair))), + ("radius", ctypes.c_double)] + + +class TaskList(ctypes.Structure): + ''' + A task list. + ''' + _fields_ = [("nlevels", ctypes.c_int), + ("hermi", ctypes.c_int), + ("gridlevel_info", ctypes.POINTER(GridLevel_Info)), + ("tasks", ctypes.POINTER(ctypes.POINTER(Task)))] + + +def multi_grids_tasks(cell, ke_cutoff=None, hermi=0, + ngrids=NGRIDS, ke_ratio=KE_RATIO, rel_cutoff=REL_CUTOFF): + if ke_cutoff is None: + ke_cutoff = cell.ke_cutoff + if ke_cutoff is None: + raise ValueError("cell.ke_cutoff is not set.") + ke1 = ke_cutoff + cutoff = [ke1,] + for i in range(ngrids-1): + ke1 /= ke_ratio + cutoff.append(ke1) + cutoff.reverse() + a = cell.lattice_vectors() + mesh = [] + for ke in cutoff: + mesh.append(tools.cutoff_to_mesh(a, ke)) + logger.info(cell, 'ke_cutoff for multigrid tasks:\n%s', cutoff) + logger.info(cell, 'meshes for multigrid tasks:\n%s', mesh) + gridlevel_info = init_gridlevel_info(cutoff, rel_cutoff, mesh) + task_list = build_task_list(cell, gridlevel_info, hermi=hermi) + return task_list + + +def _update_task_list(mydf, hermi=0, ngrids=None, ke_ratio=None, rel_cutoff=None): + ''' + Update :attr:`task_list` if necessary. + ''' + cell = mydf.cell + if ngrids is None: + ngrids = mydf.ngrids + if ke_ratio is None: + ke_ratio = mydf.ke_ratio + if rel_cutoff is None: + rel_cutoff = mydf.rel_cutoff + + need_update = False + task_list = getattr(mydf, 'task_list', None) + if task_list is None: + need_update = True + else: + hermi_orig = task_list.contents.hermi + nlevels = task_list.contents.nlevels + rel_cutoff_orig = task_list.contents.gridlevel_info.contents.rel_cutoff + #TODO also need to check kenetic energy cutoff change + if (hermi_orig > hermi or + nlevels != ngrids or + abs(rel_cutoff_orig-rel_cutoff) > 1e-12): + need_update = True + + if need_update: + if task_list is not None: + free_task_list(task_list) + task_list = multi_grids_tasks(cell, hermi=hermi, ngrids=ngrids, + ke_ratio=ke_ratio, rel_cutoff=rel_cutoff) + mydf.task_list = task_list + return task_list + + +def init_gridlevel_info(cutoff, rel_cutoff, mesh): + if cutoff[0] < 1e-15: + cutoff = cutoff[1:] + cutoff = np.asarray(cutoff, order='C', dtype=np.double) + mesh = np.asarray(np.asarray(mesh).reshape(-1,3), order='C', dtype=np.int32) + nlevels = len(cutoff) + gridlevel_info = ctypes.POINTER(GridLevel_Info)() + fn = getattr(libdft, "init_gridlevel_info", None) + try: + fn(ctypes.byref(gridlevel_info), + cutoff.ctypes.data_as(ctypes.c_void_p), + mesh.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nlevels), ctypes.c_double(rel_cutoff)) + except Exception as e: + raise RuntimeError("Failed to init grid level info. %s" % e) + return gridlevel_info + + +def free_gridlevel_info(gridlevel_info): + fn = getattr(libdft, "del_gridlevel_info", None) + try: + fn(ctypes.byref(gridlevel_info)) + except Exception as e: + raise RuntimeError("Failed to free grid level info. %s" % e) + + +def init_rs_grid(gridlevel_info, comp): + ''' + Initialize values on real space multigrid + ''' + rs_grid = ctypes.POINTER(RS_Grid)() + fn = getattr(libdft, "init_rs_grid", None) + try: + fn(ctypes.byref(rs_grid), + ctypes.byref(gridlevel_info), + ctypes.c_int(comp)) + except Exception as e: + raise RuntimeError("Failed to initialize real space multigrid data. %s" % e) + return rs_grid + + +def free_rs_grid(rs_grid): + fn = getattr(libdft, "del_rs_grid", None) + try: + fn(ctypes.byref(rs_grid)) + except Exception as e: + raise RuntimeError("Failed to free real space multigrid data. %s" % e) + + +def build_task_list(cell, gridlevel_info, cell1=None, Ls=None, hermi=0, precision=None): + ''' + Build the task list for multigrid DFT calculations. + + Arguments: + cell : :class:`pbc.gto.cell.Cell` + The :class:`Cell` instance for the bra basis functions. + gridlevel_info : :class:`ctypes.POINTER` + The C pointer of the :class:`GridLevel_Info` structure. + cell1 : :class:`pbc.gto.cell.Cell`, optional + The :class:`Cell` instance for the ket basis functions. + If not given, both bra and ket basis functions come from cell. + Ls : (*,3) array, optional + The cartesian coordinates of the periodic images. + Default is calculated by :func:`cell.get_lattice_Ls`. + hermi : int, optional + If :math:`hermi=1`, the task list is built only for + the upper triangle of the matrix. Default is 0. + precision : float, optional + The integral precision. Default is :attr:`cell.precision`. + + Returns: :class:`ctypes.POINTER` + The C pointer of the :class:`TaskList` structure. + ''' + from pyscf.pbc.gto import build_neighbor_list_for_shlpairs, free_neighbor_list + if cell1 is None: + cell1 = cell + if Ls is None: + Ls = cell.get_lattice_Ls() + if precision is None: + precision = cell.precision + + if hermi == 1 and cell1 is not cell: + logger.warn(cell, + "Set hermi=0 because cell and cell1 are not the same.") + hermi = 0 + + ish_atm = np.asarray(cell._atm, order='C', dtype=np.int32) + ish_bas = np.asarray(cell._bas, order='C', dtype=np.int32) + ish_env = np.asarray(cell._env, order='C', dtype=float) + nish = len(ish_bas) + ish_rcut, ipgf_rcut = cell.rcut_by_shells(precision=precision, + return_pgf_radius=True) + assert nish == len(ish_rcut) + ptr_ipgf_rcut = lib.ndarray_pointer_2d(ipgf_rcut) + + if cell1 is cell: + jsh_atm = ish_atm + jsh_bas = ish_bas + jsh_env = ish_env + jsh_rcut = ish_rcut + jpgf_rcut = ipgf_rcut + ptr_jpgf_rcut = ptr_ipgf_rcut + else: + jsh_atm = np.asarray(cell1._atm, order='C', dtype=np.int32) + jsh_bas = np.asarray(cell1._bas, order='C', dtype=np.int32) + jsh_env = np.asarray(cell1._env, order='C', dtype=float) + jsh_rcut, jpgf_rcut = cell1.rcut_by_shells(precision=precision, + return_pgf_radius=True) + ptr_jpgf_rcut = lib.ndarray_pointer_2d(jpgf_rcut) + njsh = len(jsh_bas) + assert njsh == len(jsh_rcut) + + nl = build_neighbor_list_for_shlpairs(cell, cell1, Ls=Ls, + ish_rcut=ish_rcut, jsh_rcut=jsh_rcut, + hermi=hermi) + + task_list = ctypes.POINTER(TaskList)() + func = getattr(libdft, "build_task_list", None) + try: + func(ctypes.byref(task_list), + ctypes.byref(nl), ctypes.byref(gridlevel_info), + ish_atm.ctypes.data_as(ctypes.c_void_p), + ish_bas.ctypes.data_as(ctypes.c_void_p), + ish_env.ctypes.data_as(ctypes.c_void_p), + ish_rcut.ctypes.data_as(ctypes.c_void_p), + ptr_ipgf_rcut, + jsh_atm.ctypes.data_as(ctypes.c_void_p), + jsh_bas.ctypes.data_as(ctypes.c_void_p), + jsh_env.ctypes.data_as(ctypes.c_void_p), + jsh_rcut.ctypes.data_as(ctypes.c_void_p), + ptr_jpgf_rcut, + ctypes.c_int(nish), ctypes.c_int(njsh), + Ls.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(precision), ctypes.c_int(hermi)) + except Exception as e: + raise RuntimeError("Failed to build task list. %s" % e) + free_neighbor_list(nl) + return task_list + + +def free_task_list(task_list): + ''' + Note: + This will also free task_list.contents.gridlevel_info. + ''' + if task_list is None: + return + func = getattr(libdft, "del_task_list", None) + try: + func(ctypes.byref(task_list)) + except Exception as e: + raise RuntimeError("Failed to free task list. %s" % e) + + +def eval_rho(cell, dm, task_list, shls_slice=None, hermi=0, xctype='LDA', kpts=None, + dimension=None, cell1=None, shls_slice1=None, Ls=None, + a=None, ignore_imag=False): + ''' + Collocate density (opt. gradients) on the real-space grid. + The two sets of Gaussian functions can be different. + + Returns: + rho: RS_Grid object + Densities on real space multigrids. + ''' + cell0 = cell + shls_slice0 = shls_slice + if cell1 is None: + cell1 = cell0 + + #TODO mixture of cartesian and spherical bases + assert cell0.cart == cell1.cart + + ish_atm = np.asarray(cell0._atm, order='C', dtype=np.int32) + ish_bas = np.asarray(cell0._bas, order='C', dtype=np.int32) + ish_env = np.asarray(cell0._env, order='C', dtype=np.double) + ish_env[PTR_EXPDROP] = min(cell0.precision*EXTRA_PREC, EXPDROP) + + if cell1 is cell0: + jsh_atm = ish_atm + jsh_bas = ish_bas + jsh_env = ish_env + else: + jsh_atm = np.asarray(cell1._atm, order='C', dtype=np.int32) + jsh_bas = np.asarray(cell1._bas, order='C', dtype=np.int32) + jsh_env = np.asarray(cell1._env, order='C', dtype=np.double) + jsh_env[PTR_EXPDROP] = min(cell1.precision*EXTRA_PREC, EXPDROP) + + if shls_slice0 is None: + shls_slice0 = (0, cell0.nbas) + i0, i1 = shls_slice0 + if shls_slice1 is None: + shls_slice1 = shls_slice0 + j0, j1 = shls_slice1 + + if hermi == 1: + assert cell1 is cell0 + assert i0 == j0 and i1 == j1 + + key0 = 'cart' if cell0.cart else 'sph' + ao_loc0 = moleintor.make_loc(ish_bas, key0) + naoi = ao_loc0[i1] - ao_loc0[i0] + if hermi == 1: + ao_loc1 = ao_loc0 + else: + key1 = 'cart' if cell1.cart else 'sph' + ao_loc1 = moleintor.make_loc(jsh_bas, key1) + naoj = ao_loc1[j1] - ao_loc1[j0] + + dm = np.asarray(dm, order='C') + assert dm.shape[-2:] == (naoi, naoj) + + if dimension is None: + dimension = cell0.dimension + assert dimension == getattr(cell1, "dimension", None) + + if Ls is None and dimension > 0: + Ls = np.asarray(cell0.get_lattice_Ls(), order='C') + elif Ls is None and dimension == 0: + Ls = np.zeros((1,3)) + + if dimension == 0 or kpts is None or gamma_point(kpts): + nkpts, nimgs = 1, Ls.shape[0] + dm = dm.reshape(-1,1,naoi,naoj) + else: + expkL = np.exp(1j*kpts.reshape(-1,3).dot(Ls.T)) + nkpts, nimgs = expkL.shape + dm = dm.reshape(-1,nkpts,naoi,naoj) + n_dm = dm.shape[0] + + #TODO check if cell1 has the same lattice vectors + if a is None: + a = cell0.lattice_vectors() + b = np.linalg.inv(a.T) + + if abs(a-np.diag(a.diagonal())).max() < 1e-12: + lattice_type = '_orth' + else: + lattice_type = '_nonorth' + xctype = xctype.upper() + if xctype == 'LDA': + comp = 1 + elif xctype == 'GGA': + if hermi == 1: + raise RuntimeError('hermi=1 is not supported for GGA functional') + comp = 4 + else: + raise NotImplementedError('meta-GGA') + + eval_fn = 'make_rho_' + xctype.lower() + lattice_type + drv = getattr(libdft, "grid_collocate_drv", None) + + def make_rho_(rs_rho, dm): + try: + drv(getattr(libdft, eval_fn, None), + ctypes.byref(rs_rho), + dm.ctypes.data_as(ctypes.c_void_p), + ctypes.byref(task_list), + ctypes.c_int(comp), ctypes.c_int(hermi), + (ctypes.c_int*4)(i0, i1, j0, j1), + ao_loc0.ctypes.data_as(ctypes.c_void_p), + ao_loc1.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(dimension), + Ls.ctypes.data_as(ctypes.c_void_p), + a.ctypes.data_as(ctypes.c_void_p), + b.ctypes.data_as(ctypes.c_void_p), + ish_atm.ctypes.data_as(ctypes.c_void_p), + ish_bas.ctypes.data_as(ctypes.c_void_p), + ish_env.ctypes.data_as(ctypes.c_void_p), + jsh_atm.ctypes.data_as(ctypes.c_void_p), + jsh_bas.ctypes.data_as(ctypes.c_void_p), + jsh_env.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(cell0.cart)) + except Exception as e: + raise RuntimeError("Failed to compute rho. %s" % e) + return rs_rho + + gridlevel_info = task_list.contents.gridlevel_info + rho = [] + for i, dm_i in enumerate(dm): + rs_rho = init_rs_grid(gridlevel_info, comp) + if dimension == 0 or kpts is None or gamma_point(kpts): + make_rho_(rs_rho, dm_i) + else: + raise NotImplementedError + rho.append(rs_rho) + + if n_dm == 1: + rho = rho[0] + return rho + + +def _eval_rhoG(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), deriv=0, + rhog_high_order=RHOG_HIGH_ORDER): + assert(deriv < 2) + cell = mydf.cell + + dm_kpts = np.asarray(dm_kpts, order='C') + dms = _format_dms(dm_kpts, kpts) + nset, nkpts, nao = dms.shape[:3] + + task_list = _update_task_list(mydf, hermi=hermi, ngrids=mydf.ngrids, + ke_ratio=mydf.ke_ratio, rel_cutoff=mydf.rel_cutoff) + + gga_high_order = False + if deriv == 0: + xctype = 'LDA' + rhodim = 1 + elif deriv == 1: + if rhog_high_order: + xctype = 'GGA' + rhodim = 4 + else: # approximate high order derivatives in reciprocal space + gga_high_order = True + xctype = 'LDA' + rhodim = 1 + deriv = 0 + assert(hermi == 1 or gamma_point(kpts)) + elif deriv == 2: # meta-GGA + raise NotImplementedError + assert(hermi == 1 or gamma_point(kpts)) + + ignore_imag = (hermi == 1) + + rs_rho = eval_rho(cell, dms, task_list, hermi=hermi, xctype=xctype, kpts=kpts, + ignore_imag=ignore_imag) + + nx, ny, nz = mydf.mesh + rhoG = np.zeros((nset*rhodim,nx,ny,nz), dtype=np.complex128) + nlevels = task_list.contents.nlevels + meshes = task_list.contents.gridlevel_info.contents.mesh + meshes = np.ctypeslib.as_array(meshes, shape=(nlevels,3)) + for ilevel in range(nlevels): + mesh = meshes[ilevel] + ngrids = np.prod(mesh) + if nset > 1: + rho = [] + for i in range(nset): + rho.append(np.ctypeslib.as_array(rs_rho[i].contents.data[ilevel], shape=(ngrids,))) + rho = np.asarray(rho) + else: + rho = np.ctypeslib.as_array(rs_rho.contents.data[ilevel], shape=(ngrids,)) + + weight = 1./nkpts * cell.vol/ngrids + rho_freq = tools.fft(rho.reshape(nset*rhodim, -1), mesh) + rho = None + rho_freq *= weight + gx = np.fft.fftfreq(mesh[0], 1./mesh[0]).astype(np.int32) + gy = np.fft.fftfreq(mesh[1], 1./mesh[1]).astype(np.int32) + gz = np.fft.fftfreq(mesh[2], 1./mesh[2]).astype(np.int32) + _takebak_4d(rhoG, rho_freq.reshape((-1,) + tuple(mesh)), (None, gx, gy, gz)) + rho_freq = None + + if nset > 1: + for i in range(nset): + free_rs_grid(rs_rho[i]) + else: + free_rs_grid(rs_rho) + rs_rho = None + + rhoG = rhoG.reshape(nset,rhodim,-1) + if gga_high_order: + Gv = cell.get_Gv(mydf.mesh) + #:rhoG1 = np.einsum('np,px->nxp', 1j*rhoG[:,0], Gv) + rhoG1 = gradient_gs(rhoG[:,0], Gv) + rhoG = np.concatenate([rhoG, rhoG1], axis=1) + Gv = rhoG1 = None + return rhoG + + +def eval_mat(cell, weights, task_list, shls_slice=None, comp=1, hermi=0, deriv=0, + xctype='LDA', kpts=None, grid_level=None, dimension=None, mesh=None, + cell1=None, shls_slice1=None, Ls=None, a=None): + + cell0 = cell + shls_slice0 = shls_slice + if cell1 is None: + cell1 = cell0 + + if mesh is None: + mesh = cell0.mesh + + #TODO mixture of cartesian and spherical bases + assert cell0.cart == cell1.cart + + ish_atm = np.asarray(cell0._atm, order='C', dtype=np.int32) + ish_bas = np.asarray(cell0._bas, order='C', dtype=np.int32) + ish_env = np.asarray(cell0._env, order='C', dtype=np.double) + ish_env[PTR_EXPDROP] = min(cell0.precision*EXTRA_PREC, EXPDROP) + + if cell1 is cell0: + jsh_atm = ish_atm + jsh_bas = ish_bas + jsh_env = ish_env + else: + jsh_atm = np.asarray(cell1._atm, order='C', dtype=np.int32) + jsh_bas = np.asarray(cell1._bas, order='C', dtype=np.int32) + jsh_env = np.asarray(cell1._env, order='C', dtype=np.double) + jsh_env[PTR_EXPDROP] = min(cell1.precision*EXTRA_PREC, EXPDROP) + + if shls_slice0 is None: + shls_slice0 = (0, cell0.nbas) + i0, i1 = shls_slice0 + if shls_slice1 is None: + shls_slice1 = (0, cell1.nbas) + j0, j1 = shls_slice1 + + if hermi == 1: + assert cell1 is cell0 + assert i0 == j0 and i1 == j1 + + key0 = 'cart' if cell0.cart else 'sph' + ao_loc0 = moleintor.make_loc(ish_bas, key0) + naoi = ao_loc0[i1] - ao_loc0[i0] + if hermi == 1: + ao_loc1 = ao_loc0 + else: + key1 = 'cart' if cell1.cart else 'sph' + ao_loc1 = moleintor.make_loc(jsh_bas, key1) + naoj = ao_loc1[j1] - ao_loc1[j0] + + if dimension is None: + dimension = cell0.dimension + assert dimension == getattr(cell1, "dimension", None) + + if Ls is None and dimension > 0: + Ls = np.asarray(cell0.get_lattice_Ls(), order='C') + elif Ls is None and dimension == 0: + Ls = np.zeros((1,3)) + + if dimension == 0 or kpts is None or gamma_point(kpts): + nkpts, nimgs = 1, Ls.shape[0] + else: + expkL = np.exp(1j*kpts.reshape(-1,3).dot(Ls.T)) + nkpts, nimgs = expkL.shape + + #TODO check if cell1 has the same lattice vectors + if a is None: + a = cell0.lattice_vectors() + b = np.linalg.inv(a.T) + + if abs(a-np.diag(a.diagonal())).max() < 1e-12: + lattice_type = '_orth' + else: + lattice_type = '_nonorth' + + weights = np.asarray(weights, order='C') + assert(weights.dtype == np.double) + xctype = xctype.upper() + n_mat = None + if xctype == 'LDA': + if weights.ndim == 1: + weights = weights.reshape(-1, np.prod(mesh)) + else: + n_mat = weights.shape[0] + elif xctype == 'GGA': + if weights.ndim == 2: + weights = weights.reshape(-1, 4, np.prod(mesh)) + else: + n_mat = weights.shape[0] + else: + raise NotImplementedError + + eval_fn = 'eval_mat_' + xctype.lower() + lattice_type + if deriv > 0: + if deriv == 1: + assert comp == 3 + assert hermi == 0 + eval_fn += '_ip1' + else: + raise NotImplementedError + drv = getattr(libdft, "grid_integrate_drv", None) + + def make_mat(wv): + if comp == 1: + mat = np.zeros((naoi, naoj)) + else: + mat = np.zeros((comp, naoi, naoj)) + + try: + drv(getattr(libdft, eval_fn, None), + mat.ctypes.data_as(ctypes.c_void_p), + wv.ctypes.data_as(ctypes.c_void_p), + ctypes.byref(task_list), + ctypes.c_int(comp), ctypes.c_int(hermi), + ctypes.c_int(grid_level), + (ctypes.c_int*4)(i0, i1, j0, j1), + ao_loc0.ctypes.data_as(ctypes.c_void_p), + ao_loc1.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(dimension), + Ls.ctypes.data_as(ctypes.c_void_p), + a.ctypes.data_as(ctypes.c_void_p), + b.ctypes.data_as(ctypes.c_void_p), + ish_atm.ctypes.data_as(ctypes.c_void_p), + ish_bas.ctypes.data_as(ctypes.c_void_p), + ish_env.ctypes.data_as(ctypes.c_void_p), + jsh_atm.ctypes.data_as(ctypes.c_void_p), + jsh_bas.ctypes.data_as(ctypes.c_void_p), + jsh_env.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(cell0.cart)) + except Exception as e: + raise RuntimeError("Failed to compute rho. %s" % e) + return mat + + out = [] + for wv in weights: + if dimension == 0 or kpts is None or gamma_point(kpts): + mat = make_mat(wv) + else: + raise NotImplementedError + out.append(mat) + + if n_mat is None: + out = out[0] + return out + + +def _get_j_pass2(mydf, vG, kpts=np.zeros((1,3)), hermi=1, verbose=None): + cell = mydf.cell + nkpts = len(kpts) + nao = cell.nao_nr() + nx, ny, nz = mydf.mesh + vG = vG.reshape(-1,nx,ny,nz) + nset = vG.shape[0] + + task_list = _update_task_list(mydf, hermi=hermi, ngrids=mydf.ngrids, + ke_ratio=mydf.ke_ratio, rel_cutoff=mydf.rel_cutoff) + + at_gamma_point = gamma_point(kpts) + if at_gamma_point: + vj_kpts = np.zeros((nset,nkpts,nao,nao)) + else: + vj_kpts = np.zeros((nset,nkpts,nao,nao), dtype=np.complex128) + + nlevels = task_list.contents.nlevels + meshes = task_list.contents.gridlevel_info.contents.mesh + meshes = np.ctypeslib.as_array(meshes, shape=(nlevels,3)) + for ilevel in range(nlevels): + mesh = meshes[ilevel] + ngrids = np.prod(mesh) + + gx = np.fft.fftfreq(mesh[0], 1./mesh[0]).astype(np.int32) + gy = np.fft.fftfreq(mesh[1], 1./mesh[1]).astype(np.int32) + gz = np.fft.fftfreq(mesh[2], 1./mesh[2]).astype(np.int32) + sub_vG = _take_4d(vG, (None, gx, gy, gz)).reshape(nset,ngrids) + + v_rs = tools.ifft(sub_vG, mesh).reshape(nset,ngrids) + vR = np.asarray(v_rs.real, order='C') + vI = np.asarray(v_rs.imag, order='C') + if at_gamma_point: + v_rs = vR + + mat = eval_mat(cell, vR, task_list, comp=1, hermi=hermi, + xctype='LDA', kpts=kpts, grid_level=ilevel, mesh=mesh) + vj_kpts += np.asarray(mat).reshape(nset,-1,nao,nao) + if not at_gamma_point and abs(vI).max() > IMAG_TOL: + raise NotImplementedError + + if nset == 1: + vj_kpts = vj_kpts[0] + return vj_kpts + + +def _get_j_pass2_ip1(mydf, vG, kpts=np.zeros((1,3)), hermi=0, deriv=1, verbose=None): + if deriv == 1: + comp = 3 + assert hermi == 0 + else: + raise NotImplementedError + + cell = mydf.cell + nkpts = len(kpts) + nao = cell.nao_nr() + nx, ny, nz = mydf.mesh + vG = vG.reshape(-1,nx,ny,nz) + nset = vG.shape[0] + + task_list = _update_task_list(mydf, hermi=hermi, ngrids=mydf.ngrids, + ke_ratio=mydf.ke_ratio, rel_cutoff=mydf.rel_cutoff) + + at_gamma_point = gamma_point(kpts) + if at_gamma_point: + vj_kpts = np.zeros((nset,nkpts,comp,nao,nao)) + else: + vj_kpts = np.zeros((nset,nkpts,comp,nao,nao), dtype=np.complex128) + + nlevels = task_list.contents.nlevels + meshes = task_list.contents.gridlevel_info.contents.mesh + meshes = np.ctypeslib.as_array(meshes, shape=(nlevels,3)) + for ilevel in range(nlevels): + mesh = meshes[ilevel] + ngrids = np.prod(mesh) + + gx = np.fft.fftfreq(mesh[0], 1./mesh[0]).astype(np.int32) + gy = np.fft.fftfreq(mesh[1], 1./mesh[1]).astype(np.int32) + gz = np.fft.fftfreq(mesh[2], 1./mesh[2]).astype(np.int32) + sub_vG = _take_4d(vG, (None, gx, gy, gz)).reshape(nset,ngrids) + + v_rs = tools.ifft(sub_vG, mesh).reshape(nset,ngrids) + if at_gamma_point: + vR = np.asarray(v_rs.real, order='C', dtype=float) + #vI = None + else: + raise NotImplementedError + + mat = eval_mat(cell, vR, task_list, comp=comp, hermi=hermi, deriv=deriv, + xctype='LDA', kpts=kpts, grid_level=ilevel, mesh=mesh) + mat = np.asarray(mat).reshape(nset,-1,comp,nao,nao) + vj_kpts = np.add(vj_kpts, mat, out=vj_kpts) + + if nset == 1: + vj_kpts = vj_kpts[0] + return vj_kpts + + +def _get_gga_pass2(mydf, vG, kpts=np.zeros((1,3)), hermi=1, verbose=None): + cell = mydf.cell + nkpts = len(kpts) + nao = cell.nao_nr() + nx, ny, nz = mydf.mesh + vG = vG.reshape(-1,4,nx,ny,nz) + nset = vG.shape[0] + + task_list = _update_task_list(mydf, hermi=hermi, ngrids=mydf.ngrids, + ke_ratio=mydf.ke_ratio, rel_cutoff=mydf.rel_cutoff) + + if gamma_point(kpts): + veff = np.zeros((nset,nkpts,nao,nao)) + else: + veff = np.zeros((nset,nkpts,nao,nao), dtype=np.complex128) + + nlevels = task_list.contents.nlevels + meshes = task_list.contents.gridlevel_info.contents.mesh + meshes = np.ctypeslib.as_array(meshes, shape=(nlevels,3)) + for ilevel in range(nlevels): + mesh = meshes[ilevel] + ngrids = np.prod(mesh) + + gx = np.fft.fftfreq(mesh[0], 1./mesh[0]).astype(np.int32) + gy = np.fft.fftfreq(mesh[1], 1./mesh[1]).astype(np.int32) + gz = np.fft.fftfreq(mesh[2], 1./mesh[2]).astype(np.int32) + sub_vG = _take_5d(vG, (None, None, gx, gy, gz)).reshape(-1,ngrids) + wv = tools.ifft(sub_vG, mesh).real.reshape(nset,4,ngrids) + wv = np.asarray(wv, order='C') + + mat = eval_mat(cell, wv, task_list, comp=1, hermi=hermi, + xctype='GGA', kpts=kpts, grid_level=ilevel, mesh=mesh) + mat = np.asarray(mat).reshape(nset,-1,nao,nao) + veff = np.add(veff, mat, out=veff) + if not gamma_point(kpts): + raise NotImplementedError + + if nset == 1: + veff = veff[0] + return veff + + +def _get_gga_pass2_ip1(mydf, vG, kpts=np.zeros((1,3)), hermi=0, deriv=1, verbose=None): + if deriv == 1: + comp = 3 + assert hermi == 0 + else: + raise NotImplementedError + + cell = mydf.cell + nkpts = len(kpts) + nao = cell.nao_nr() + nx, ny, nz = mydf.mesh + vG = vG.reshape(-1,4,nx,ny,nz) + nset = vG.shape[0] + + task_list = _update_task_list(mydf, hermi=hermi, ngrids=mydf.ngrids, + ke_ratio=mydf.ke_ratio, rel_cutoff=mydf.rel_cutoff) + + at_gamma_point = gamma_point(kpts) + if at_gamma_point: + vj_kpts = np.zeros((nset,nkpts,comp,nao,nao)) + else: + vj_kpts = np.zeros((nset,nkpts,comp,nao,nao), dtype=np.complex128) + + nlevels = task_list.contents.nlevels + meshes = task_list.contents.gridlevel_info.contents.mesh + meshes = np.ctypeslib.as_array(meshes, shape=(nlevels,3)) + for ilevel in range(nlevels): + mesh = meshes[ilevel] + ngrids = np.prod(mesh) + + gx = np.fft.fftfreq(mesh[0], 1./mesh[0]).astype(np.int32) + gy = np.fft.fftfreq(mesh[1], 1./mesh[1]).astype(np.int32) + gz = np.fft.fftfreq(mesh[2], 1./mesh[2]).astype(np.int32) + sub_vG = _take_5d(vG, (None, None, gx, gy, gz)).reshape(-1,ngrids) + + v_rs = tools.ifft(sub_vG, mesh).reshape(nset,4,ngrids) + vR = np.asarray(v_rs.real, order='C') + vI = np.asarray(v_rs.imag, order='C') + if at_gamma_point: + v_rs = vR + + mat = eval_mat(cell, vR, task_list, comp=comp, hermi=hermi, deriv=deriv, + xctype='GGA', kpts=kpts, grid_level=ilevel, mesh=mesh) + vj_kpts += np.asarray(mat).reshape(nset,-1,comp,nao,nao) + if not at_gamma_point and abs(vI).max() > IMAG_TOL: + raise NotImplementedError + + if nset == 1: + vj_kpts = vj_kpts[0] + return vj_kpts + + +def _rks_gga_wv0(rho, vxc, weight): + vrho, vgamma = vxc[:2] + ngrid = vrho.size + wv = np.empty((4,ngrid)) + wv[0] = np.multiply(weight, vrho, out=wv[0]) + for i in range(1, 4): + wv[i] = np.multiply(weight * 2, np.multiply(vgamma, rho[i], out=wv[i]), out=wv[i]) + return wv + + +def _uks_gga_wv0(rho, vxc, weight): + rhoa, rhob = rho + vrho, vsigma = vxc[:2] + ngrids = vrho.shape[0] + wv = np.empty((2, 4, ngrids)) + wv[0,0] = np.multiply(weight, vrho[:,0], out=wv[0,0]) + for i in range(1,4): + wv[0,i] = np.multiply(2., np.multiply(rhoa[i], vsigma[:,0], out=wv[0,i]), out=wv[0,i]) + wv[0,i] = np.add(wv[0,i], np.multiply(rhob[i], vsigma[:,1]), out=wv[0,i]) + wv[0,i] = np.multiply(weight, wv[0,i], out=wv[0,i]) + wv[1,0] = np.multiply(weight, vrho[:,1], out=wv[1,0]) + for i in range(1,4): + wv[1,i] = np.multiply(2., np.multiply(rhob[i], vsigma[:,2], out=wv[1,i]), out=wv[1,i]) + wv[1,i] = np.add(wv[1,i], np.multiply(rhoa[i], vsigma[:,1]), out=wv[1,i]) + wv[1,i] = np.multiply(weight, wv[1,i], out=wv[1,i]) + return wv + + +def _rks_gga_wv0_pw(cell, rho, vxc, weight, mesh): + vrho, vgamma = vxc[:2] + ngrid = vrho.size + buf = np.empty((3,ngrid)) + for i in range(1, 4): + buf[i-1] = np.multiply(vgamma, rho[i], out=buf[i-1]) + + vrho_freq = tools.fft(vrho, mesh).reshape((1,ngrid)) + buf_freq = tools.fft(buf, mesh).reshape((3,ngrid)) + Gv = cell.get_Gv(mesh) + #out = vrho_freq - 2j * np.einsum('px,xp->p', Gv, buf_freq) + #out *= weight + + out = np.empty((ngrid,), order="C", dtype=np.complex128) + func = getattr(libdft, 'get_gga_vrho_gs', None) + func(out.ctypes.data_as(ctypes.c_void_p), + vrho_freq.ctypes.data_as(ctypes.c_void_p), + buf_freq.ctypes.data_as(ctypes.c_void_p), + Gv.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(weight), ctypes.c_int(ngrid)) + return out + + +def _uks_gga_wv0_pw(cell, rho, vxc, weight, mesh): + rhoa, rhob = rho + vrho, vgamma = vxc[:2] + ngrid = vrho.shape[0] + buf = np.empty((2,3,ngrid)) + for i in range(1, 4): + buf[0,i-1] = np.multiply(vgamma[:,0], rhoa[i], out=buf[0,i-1]) + tmp = np.multiply(vgamma[:,1], rhob[i]) + tmp = np.multiply(.5, tmp, out=tmp) + buf[0,i-1] = np.add(buf[0,i-1], tmp, out=buf[0,i-1]) + + buf[1,i-1] = np.multiply(vgamma[:,2], rhob[i], out=buf[1,i-1]) + tmp = np.multiply(vgamma[:,1], rhoa[i]) + tmp = np.multiply(.5, tmp, out=tmp) + buf[1,i-1] = np.add(buf[1,i-1], tmp, out=buf[1,i-1]) + + + vrho_freq = tools.fft(vrho.T, mesh).reshape((2,ngrid)) + buf_freq = tools.fft(buf.reshape(-1,ngrid), mesh).reshape((2,3,ngrid)) + Gv = cell.get_Gv(mesh) + #out = vrho_freq - 2j * np.einsum('px,xp->p', Gv, buf_freq) + #out *= weight + + out = np.empty((2,ngrid), order="C", dtype=np.complex128) + func = getattr(libdft, 'get_gga_vrho_gs') + for s in range(2): + func(out[s].ctypes.data_as(ctypes.c_void_p), + vrho_freq[s].ctypes.data_as(ctypes.c_void_p), + buf_freq[s].ctypes.data_as(ctypes.c_void_p), + Gv.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(weight), ctypes.c_int(ngrid)) + return out + + +def nr_rks(mydf, xc_code, dm_kpts, hermi=1, kpts=None, + kpts_band=None, with_j=False, return_j=False, verbose=None): + ''' + Same as multigrid.nr_rks, but considers Hermitian symmetry also for GGA + ''' + if kpts is None: kpts = mydf.kpts + log = logger.new_logger(mydf, verbose) + cell = mydf.cell + dm_kpts = np.asarray(dm_kpts, order='C') + dms = _format_dms(dm_kpts, kpts) + nset, nkpts, nao = dms.shape[:3] + kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band + + ni = mydf._numint + xctype = ni._xc_type(xc_code) + if xctype == 'LDA': + deriv = 0 + elif xctype == 'GGA': + deriv = 1 + rhoG = _eval_rhoG(mydf, dm_kpts, hermi, kpts, deriv) + + mesh = mydf.mesh + ngrids = np.prod(mesh) + + coulG = tools.get_coulG(cell, mesh=mesh) + #vG = np.einsum('ng,g->ng', rhoG[:,0], coulG) + vG = np.empty_like(rhoG[:,0], dtype=np.result_type(rhoG[:,0], coulG)) + for i, rhoG_i in enumerate(rhoG[:,0]): + vG[i] = np.multiply(rhoG_i, coulG, out=vG[i]) + coulG = None + + if mydf.vpplocG_part1 is not None: + for i in range(nset): + #vG[i] += mydf.vpplocG_part1 * 2 + vG[i] = np.add(vG[i], np.multiply(2., mydf.vpplocG_part1), out=vG[i]) + + #ecoul = .5 * np.einsum('ng,ng->n', rhoG[:,0].real, vG.real) + #ecoul+= .5 * np.einsum('ng,ng->n', rhoG[:,0].imag, vG.imag) + ecoul = np.zeros((rhoG.shape[0],)) + for i in range(rhoG.shape[0]): + ecoul[i] = .5 * np.vdot(rhoG[i,0], vG[i]).real + + ecoul /= cell.vol + log.debug('Multigrid Coulomb energy %s', ecoul) + + if mydf.vpplocG_part1 is not None: + for i in range(nset): + #vG[i] -= mydf.vpplocG_part1 + vG[i] = np.subtract(vG[i], mydf.vpplocG_part1, out=vG[i]) + + weight = cell.vol / ngrids + # *(1./weight) because rhoR is scaled by weight in _eval_rhoG. When + # computing rhoR with IFFT, the weight factor is not needed. + rhoR = tools.ifft(rhoG.reshape(-1,ngrids), mesh).real * (1./weight) + rhoR = rhoR.reshape(nset,-1,ngrids) + wv_freq = [] + nelec = np.zeros(nset) + excsum = np.zeros(nset) + for i in range(nset): + exc, vxc = ni.eval_xc(xc_code, rhoR[i], spin=0, deriv=1)[:2] + if xctype == 'LDA': + wv = np.multiply(weight, vxc[0]) + wv_freq.append(tools.fft(wv, mesh)) + wv = None + elif xctype == 'GGA': + if GGA_METHOD.upper() == 'FFT': + wv_freq.append(_rks_gga_wv0_pw(cell, rhoR[i], vxc, weight, mesh).reshape(1,ngrids)) + else: + wv = _rks_gga_wv0(rhoR[i], vxc, weight) + wv_freq.append(tools.fft(wv, mesh)) + wv = None + else: + raise NotImplementedError + + nelec[i] += np.sum(rhoR[i,0]) * weight + excsum[i] += np.sum(np.multiply(rhoR[i,0], exc)) * weight + exc = vxc = None + + rhoR = rhoG = None + + if len(wv_freq) == 1: + wv_freq = wv_freq[0].reshape(nset,-1,*mesh) + else: + wv_freq = np.asarray(wv_freq).reshape(nset,-1,*mesh) + + if nset == 1: + ecoul = ecoul[0] + nelec = nelec[0] + excsum = excsum[0] + log.debug('Multigrid exc %s nelec %s', excsum, nelec) + + kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band + if xctype == 'LDA': + if with_j: + wv_freq[:,0] += vG.reshape(nset,*mesh) + veff = _get_j_pass2(mydf, wv_freq, kpts_band, verbose=log) + elif xctype == 'GGA': + if with_j: + #wv_freq[:,0] += vG.reshape(nset,*mesh) + wv_freq[:,0] = np.add(wv_freq[:,0], vG.reshape(nset,*mesh), out=wv_freq[:,0]) + if GGA_METHOD.upper() == 'FFT': + veff = _get_j_pass2(mydf, wv_freq, kpts_band, verbose=log) + else: + veff = _get_gga_pass2(mydf, wv_freq, kpts_band, hermi=hermi, verbose=log) + wv_freq = None + veff = _format_jks(veff, dm_kpts, input_band, kpts) + + if return_j: + vj = _get_j_pass2(mydf, vG, kpts_band, verbose=log) + vj = _format_jks(veff, dm_kpts, input_band, kpts) + else: + vj = None + vG = None + + veff = lib.tag_array(veff, ecoul=ecoul, exc=excsum, vj=vj, vk=None) + return nelec, excsum, veff + +def nr_uks(mydf, xc_code, dm_kpts, hermi=1, kpts=None, + kpts_band=None, with_j=False, return_j=False, verbose=None): + if kpts is None: kpts = mydf.kpts + log = logger.new_logger(mydf, verbose) + cell = mydf.cell + dm_kpts = np.asarray(dm_kpts, order='C') + dms = _format_dms(dm_kpts, kpts) + nset, nkpts, nao = dms.shape[:3] + nset //= 2 + kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band + + mesh = mydf.mesh + ngrids = np.prod(mesh) + ni = mydf._numint + xctype = ni._xc_type(xc_code) + if xctype == 'LDA': + deriv = 0 + elif xctype == 'GGA': + deriv = 1 + + rhoG = _eval_rhoG(mydf, dm_kpts, hermi, kpts, deriv) + rhoG = rhoG.reshape(nset,2,-1,ngrids) + + coulG = tools.get_coulG(cell, mesh=mesh) + #vG = np.einsum('nsg,g->ng', rhoG[:,:,0], coulG) + vG = np.empty((nset,ngrids), dtype=np.result_type(rhoG[:,:,0], coulG)) + for i, rhoG_i in enumerate(rhoG[:,:,0]): + vG[i] = np.multiply(np.add(rhoG_i[0], rhoG_i[1]), coulG, out=vG[i]) + coulG = None + + if mydf.vpplocG_part1 is not None: + for i in range(nset): + #vG[i] += mydf.vpplocG_part1 * 2 + vG[i] = np.add(vG[i], np.multiply(2., mydf.vpplocG_part1), out=vG[i]) + + ecoul = np.zeros(nset) + for i in range(nset): + ecoul[i] = .5 * np.vdot(np.add(rhoG[i,0,0], rhoG[i,1,0]), vG[i]).real + + ecoul /= cell.vol + log.debug('Multigrid Coulomb energy %s', ecoul) + + if mydf.vpplocG_part1 is not None: + for i in range(nset): + #vG[i] -= mydf.vpplocG_part1 + vG[i] = np.subtract(vG[i], mydf.vpplocG_part1, out=vG[i]) + + weight = cell.vol / ngrids + # *(1./weight) because rhoR is scaled by weight in _eval_rhoG. When + # computing rhoR with IFFT, the weight factor is not needed. + rhoR = tools.ifft(rhoG.reshape(-1,ngrids), mesh).real * (1./weight) + rhoR = rhoR.reshape(nset,2,-1,ngrids) + wv_freq = [] + nelec = np.zeros(nset) + excsum = np.zeros(nset) + for i in range(nset): + exc, vxc = ni.eval_xc(xc_code, rhoR[i], spin=1, deriv=1)[:2] + if xctype == 'LDA': + wv = np.multiply(weight, vxc[0].T) + wv_freq.append(tools.fft(wv, mesh)) + wv = None + elif xctype == 'GGA': + if GGA_METHOD.upper() == 'FFT': + wv_freq.append(_uks_gga_wv0_pw(cell, rhoR[i], vxc, weight, mesh)) + else: + wv = _uks_gga_wv0(rhoR[i], vxc, weight) + wv_freq.append(tools.fft(wv.reshape(-1,*mesh), mesh)) + wv = None + else: + raise NotImplementedError + + nelec[i] += np.sum(rhoR[i,:,0]).sum() * weight + excsum[i] += np.sum(np.multiply(np.add(rhoR[i,0,0],rhoR[i,1,0]), exc)) * weight + exc = vxc = None + + rhoR = rhoG = None + + if len(wv_freq) == 1: + wv_freq = wv_freq[0].reshape(nset,2,-1,*mesh) + else: + wv_freq = np.asarray(wv_freq).reshape(nset,2,-1,*mesh) + + if nset == 1: + ecoul = ecoul[0] + nelec = nelec[0] + excsum = excsum[0] + log.debug('Multigrid exc %s nelec %s', excsum, nelec) + + kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band + if xctype == 'LDA': + if with_j: + for s in range(2): + wv_freq[:,s,0] += vG.reshape(nset,*mesh) + veff = _get_j_pass2(mydf, wv_freq, kpts_band, verbose=log) + elif xctype == 'GGA': + if with_j: + #wv_freq[:,:,0] += vG.reshape(nset,*mesh) + for s in range(2): + wv_freq[:,s,0] = np.add(wv_freq[:,s,0], vG.reshape(nset,*mesh), out=wv_freq[:,s,0]) + if GGA_METHOD.upper() == 'FFT': + veff = _get_j_pass2(mydf, wv_freq, kpts_band, verbose=log) + else: + veff = _get_gga_pass2(mydf, wv_freq, kpts_band, hermi=hermi, verbose=log) + wv_freq = None + veff = _format_jks(veff, dm_kpts, input_band, kpts) + + if return_j: + vj = _get_j_pass2(mydf, vG, kpts_band, verbose=log) + vj = _format_jks(veff, dm_kpts, input_band, kpts) + else: + vj = None + vG = None + + veff = lib.tag_array(veff, ecoul=ecoul, exc=excsum, vj=vj, vk=None) + return nelec, excsum, veff + +def get_veff_ip1(mydf, dm_kpts, xc_code=None, kpts=np.zeros((1,3)), kpts_band=None, spin=0): + cell = mydf.cell + dm_kpts = np.asarray(dm_kpts, order='C') + dms = _format_dms(dm_kpts, kpts) + nset, nkpts, nao = dms.shape[:3] + kpts_band = _format_kpts_band(kpts_band, kpts) + if spin == 1: + nset //= 2 + + mesh = mydf.mesh + ngrids = np.prod(mesh) + ni = mydf._numint + xctype = ni._xc_type(xc_code) + if xctype == 'LDA': + deriv = 0 + elif xctype == 'GGA': + deriv = 1 + rhoG = _eval_rhoG(mydf, dm_kpts, hermi=1, kpts=kpts_band, deriv=deriv) + if spin == 1: + rhoG = rhoG.reshape(nset,2,-1,ngrids) + # cache rhoG for core density gradients + mydf.rhoG = rhoG + + coulG = tools.get_coulG(cell, mesh=mesh) + vG = np.empty((nset,ngrids), dtype=np.result_type(rhoG, coulG)) + for i in range(nset): + if spin == 0: + vG[i] = np.multiply(rhoG[i,0], coulG, out=vG[i]) + elif spin == 1: + tmp = np.add(rhoG[i,0,0], rhoG[i,1,0]) + vG[i] = np.multiply(tmp, coulG, out=vG[i]) + + if mydf.vpplocG_part1 is not None: + for i in range(nset): + vG[i] = np.add(vG[i], mydf.vpplocG_part1, out=vG[i]) + + weight = cell.vol / ngrids + + # *(1./weight) because rhoR is scaled by weight in _eval_rhoG. When + # computing rhoR with IFFT, the weight factor is not needed. + rhoR = tools.ifft(rhoG.reshape(-1,ngrids), mesh).real * (1./weight) + if spin == 0: + rhoR = rhoR.reshape(nset,-1,ngrids) + elif spin == 1: + rhoR = rhoR.reshape(nset,2,-1,ngrids) + + wv_freq = [] + for i in range(nset): + exc, vxc = ni.eval_xc(xc_code, rhoR[i], spin=spin, deriv=1)[:2] + if spin == 0: + if xctype == 'LDA': + wv = np.multiply(weight, vxc[0]) + wv_freq.append(tools.fft(wv, mesh)) + wv = None + elif xctype == 'GGA': + if GGA_METHOD.upper() == 'FFT': + wv_freq.append(_rks_gga_wv0_pw(cell, rhoR[i], vxc, weight, mesh).reshape(1,ngrids)) + else: + wv = _rks_gga_wv0(rhoR[i], vxc, weight) + wv_freq.append(tools.fft(wv, mesh)) + else: + raise NotImplementedError + elif spin == 1: + if xctype == 'LDA': + wv = np.multiply(weight, vxc[0].T) + wv_freq.append(tools.fft(wv, mesh)) + wv = None + elif xctype == 'GGA': + if GGA_METHOD.upper() == 'FFT': + wv_freq.append(_uks_gga_wv0_pw(cell, rhoR[i], vxc, weight, mesh)) + else: + wv = _uks_gga_wv0(rhoR[i], vxc, weight) + wv_freq.append(tools.fft(wv.reshape(-1,*mesh), mesh)) + wv = None + else: + raise NotImplementedError + + rhoR = rhoG = None + if spin == 0: + if len(wv_freq) == 1: + wv_freq = wv_freq[0].reshape(nset,-1,*mesh) + else: + wv_freq = np.asarray(wv_freq).reshape(nset,-1,*mesh) + elif spin == 1: + if len(wv_freq) == 1: + wv_freq = wv_freq[0].reshape(nset,2,-1,*mesh) + else: + wv_freq = np.asarray(wv_freq).reshape(nset,2,-1,*mesh) + + for i in range(nset): + if spin == 0: + wv_freq[i,0] = np.add(wv_freq[i,0], vG[i].reshape(*mesh), out=wv_freq[i,0]) + elif spin == 1: + for s in range(2): + wv_freq[i,s,0] = np.add(wv_freq[i,s,0], vG[i].reshape(*mesh), out=wv_freq[i,s,0]) + + if xctype == 'LDA': + vj_kpts = _get_j_pass2_ip1(mydf, wv_freq, kpts_band, hermi=0, deriv=1) + elif xctype == 'GGA': + if GGA_METHOD.upper() == 'FFT': + vj_kpts = _get_j_pass2_ip1(mydf, wv_freq, kpts_band, hermi=0, deriv=1) + else: + vj_kpts = _get_gga_pass2_ip1(mydf, wv_freq, kpts_band, hermi=0, deriv=1) + else: + raise NotImplementedError + + comp = 3 + nao = cell.nao + if spin == 0: + vj_kpts = vj_kpts.reshape(nset,nkpts,comp,nao,nao) + elif spin == 1: + vj_kpts = vj_kpts.reshape(nset,2,nkpts,comp,nao,nao) + vj_kpts = np.moveaxis(vj_kpts, -3, -4) + + if nkpts == 1: + vj_kpts = vj_kpts[...,0,:,:] + if nset == 1: + vj_kpts = vj_kpts[0] + return vj_kpts + + +class MultiGridFFTDF2(MultiGridFFTDF): + ''' + Base class for multigrid DFT (version 2). + + Attributes: + task_list : TaskList instance + Task list recording which primitive basis function pairs + need to be considered. + vpplocG_part1 : arrary + Short-range part of the local pseudopotential represented + in the reciprocal space. It is cached to reduce cost. + rhoG : array + Electronic density represented in the reciprocal space. + It is cached in nuclear gradient calculations to reduce cost. + ''' + ngrids = getattr(__config__, 'pbc_dft_multigrid_ngrids', 4) + ke_ratio = getattr(__config__, 'pbc_dft_multigrid_ke_ratio', 3.0) + rel_cutoff = getattr(__config__, 'pbc_dft_multigrid_rel_cutoff', 20.0) + _keys = {'ngrids', 'ke_ratio', 'rel_cutoff', + 'task_list', 'vpplocG_part1', 'rhoG'} + + def __init__(self, cell, kpts=np.zeros((1,3))): + fft.FFTDF.__init__(self, cell, kpts) + self.task_list = None + self.vpplocG_part1 = None + self.rhoG = None + if not gamma_point(kpts): + raise NotImplementedError('MultiGridFFTDF2 only supports Gamma-point calculations.') + a = cell.lattice_vectors() + if abs(a-np.diag(a.diagonal())).max() > 1e-12: + raise NotImplementedError('MultiGridFFTDF2 only supports orthorhombic lattices.') + + def reset(self, cell=None): + self.vpplocG_part1 = None + self.rhoG = None + if self.task_list is not None: + free_task_list(self.task_list) + self.task_list = None + fft.FFTDF.reset(self, cell=cell) + + def __del__(self): + self.reset() + + def get_veff_ip1(self, dm, xc_code=None, kpts=None, kpts_band=None, spin=0): + if kpts is None: + if self.kpts is None: + kpts = np.zeros(1,3) + else: + kpts = self.kpts + kpts = kpts.reshape(-1,3) + vj = get_veff_ip1(self, dm, xc_code=xc_code, + kpts=kpts, kpts_band=kpts_band, spin=spin) + return vj + + def get_pp(self, kpts=None): + '''Compute the GTH pseudopotential matrix, which includes + the second part of the local potential and the non-local potential. + The first part of the local potential is cached as `vpplocG_part1`, + which is the reciprocal space representation, to be added to the electron + density for computing the Coulomb matrix. + In order to get the full PP matrix, the potential due to `vpplocG_part1` + needs to be added. + ''' + self.vpplocG_part1 = _get_vpplocG_part1(self, with_rho_core=True) + return _get_pp_without_erf(self, kpts) + + vpploc_part1_nuc_grad = vpploc_part1_nuc_grad diff --git a/pyscf/pbc/dft/multigrid/pp.py b/pyscf/pbc/dft/multigrid/pp.py new file mode 100644 index 0000000000..13c0813dac --- /dev/null +++ b/pyscf/pbc/dft/multigrid/pp.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import ctypes +import numpy +from pyscf import __config__ +from pyscf import lib, gto +from pyscf.lib import logger +from pyscf.pbc import tools +from pyscf.pbc.gto import pseudo +from pyscf.pbc.gto.pseudo import pp_int +from pyscf.pbc.lib.kpts_helper import gamma_point + +PP_WITH_RHO_CORE = getattr(__config__, 'pbc_dft_multigrid_pp_with_rho_core', True) + +libpbc = lib.load_library('libpbc') +libdft = lib.load_library('libdft') + +def make_rho_core(cell, mesh=None, precision=None, atm_id=None): + if mesh is None: + mesh = cell.mesh + fakecell, max_radius = fake_cell_vloc_part1(cell, atm_id=atm_id, precision=precision) + atm = fakecell._atm + bas = fakecell._bas + env = fakecell._env + + a = numpy.asarray(cell.lattice_vectors(), order='C', dtype=float) + if abs(a - numpy.diag(a.diagonal())).max() < 1e-12: + lattice_type = '_orth' + else: + lattice_type = '_nonorth' + raise NotImplementedError + eval_fn = 'make_rho_lda' + lattice_type + + b = numpy.asarray(numpy.linalg.inv(a.T), order='C', dtype=float) + mesh = numpy.asarray(mesh, order='C', dtype=numpy.int32) + rho_core = numpy.zeros((numpy.prod(mesh),), order='C', dtype=float) + drv = getattr(libdft, 'build_core_density', None) + try: + drv(getattr(libdft, eval_fn), + rho_core.ctypes.data_as(ctypes.c_void_p), + atm.ctypes.data_as(ctypes.c_void_p), + bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(len(bas)), + env.ctypes.data_as(ctypes.c_void_p), + mesh.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(cell.dimension), + a.ctypes.data_as(ctypes.c_void_p), + b.ctypes.data_as(ctypes.c_void_p), ctypes.c_double(max_radius)) + except Exception as e: + raise RuntimeError("Failed to compute rho_core. %s" % e) + return rho_core + + +def _get_pp_without_erf(mydf, kpts=None): + '''Get the periodic pseudotential nuc-el AO matrix, with G=0 removed. + ''' + cell = mydf.cell + if kpts is None: + kpts_lst = numpy.zeros((1,3)) + else: + kpts_lst = numpy.reshape(kpts, (-1,3)) + + vpp = pp_int.get_pp_loc_part2(cell, kpts_lst) + vppnl = pp_int.get_pp_nl(cell, kpts_lst) + + for k, kpt in enumerate(kpts_lst): + if gamma_point(kpt): + vpp[k] = vpp[k].real + vppnl[k].real + else: + vpp[k] += vppnl[k] + vppnl = None + + if kpts is None or numpy.shape(kpts) == (3,): + vpp = vpp[0] + return numpy.asarray(vpp) + + +def get_pp_loc_part1_gs(cell, Gv): + coulG = tools.get_coulG(cell, Gv=Gv) + G2 = numpy.einsum('ix,ix->i', Gv, Gv) + G0idx = numpy.where(G2==0)[0] + ngrid = len(G2) + Gv = numpy.asarray(Gv, order='C', dtype=numpy.double) + coulG = numpy.asarray(coulG, order='C', dtype=numpy.double) + G2 = numpy.asarray(G2, order='C', dtype=numpy.double) + + coords = cell.atom_coords() + coords = numpy.asarray(coords, order='C', dtype=numpy.double) + Z = numpy.empty([cell.natm,], order='C', dtype=numpy.double) + rloc = numpy.empty([cell.natm,], order='C', dtype=numpy.double) + for ia in range(cell.natm): + Z[ia] = cell.atom_charge(ia) + symb = cell.atom_symbol(ia) + if symb in cell._pseudo: + rloc[ia] = cell._pseudo[symb][1] + else: + rloc[ia] = -999 + + out = numpy.empty((ngrid,), order='C', dtype=numpy.complex128) + fn = getattr(libpbc, "pp_loc_part1_gs", None) + try: + fn(out.ctypes.data_as(ctypes.c_void_p), + coulG.ctypes.data_as(ctypes.c_void_p), + Gv.ctypes.data_as(ctypes.c_void_p), + G2.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(G0idx), ctypes.c_int(ngrid), + Z.ctypes.data_as(ctypes.c_void_p), + coords.ctypes.data_as(ctypes.c_void_p), + rloc.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(cell.natm)) + except Exception as e: + raise RuntimeError("Failed to get vlocG part1. %s" % e) + return out + + +def _get_vpplocG_part1(mydf, with_rho_core=PP_WITH_RHO_CORE): + cell = mydf.cell + mesh = mydf.mesh + + if not with_rho_core: + # compute rho_core directly in G-space + # this is much slower that the following + Gv = cell.get_Gv(mesh) + vpplocG_part1 = get_pp_loc_part1_gs(cell, Gv) + else: + # compute rho_core in real space then transform to G-space + weight = cell.vol / numpy.prod(mesh) + rho_core = make_rho_core(cell) + rhoG_core = weight * tools.fft(rho_core, mesh) + rho_core = None + coulG = tools.get_coulG(cell, mesh=mesh) + vpplocG_part1 = rhoG_core * coulG + rhoG_core = coulG = None + # G = 0 contribution + chargs = cell.atom_charges() + rloc = [] + for ia in range(cell.natm): + symb = cell.atom_symbol(ia) + rloc.append(cell._pseudo[symb][1]) + rloc = numpy.asarray(rloc) + vpplocG_part1[0] += 2. * numpy.pi * numpy.sum(rloc * rloc * chargs) + return vpplocG_part1 + + +def get_vpploc_part1_ip1(mydf, kpts=numpy.zeros((1,3))): + from .multigrid_pair import _get_j_pass2_ip1 + if mydf.pp_with_erf: + return 0 + + mesh = mydf.mesh + vG = mydf.vpplocG_part1 + vG.reshape(-1,*mesh) + + vpp_kpts = _get_j_pass2_ip1(mydf, vG, kpts, hermi=0, deriv=1) + if gamma_point(kpts): + vpp_kpts = vpp_kpts.real + if len(kpts) == 1: + vpp_kpts = vpp_kpts[0] + return vpp_kpts + + +def vpploc_part1_nuc_grad(mydf, dm, kpts=numpy.zeros((1,3)), atm_id=None, precision=None): + from .multigrid_pair import _eval_rhoG + t0 = (logger.process_clock(), logger.perf_counter()) + cell = mydf.cell + fakecell, max_radius = fake_cell_vloc_part1(cell, atm_id=atm_id, precision=precision) + atm = fakecell._atm + bas = fakecell._bas + env = fakecell._env + + a = numpy.asarray(cell.lattice_vectors(), order='C', dtype=float) + if abs(a - numpy.diag(a.diagonal())).max() < 1e-12: + lattice_type = '_orth' + else: + lattice_type = '_nonorth' + raise NotImplementedError + eval_fn = 'eval_mat_lda' + lattice_type + '_ip1' + + b = numpy.asarray(numpy.linalg.inv(a.T), order='C', dtype=float) + mesh = numpy.asarray(mydf.mesh, order='C', dtype=numpy.int32) + ngrids = numpy.prod(mesh) + comp = 3 + grad = numpy.zeros((len(atm),comp), order="C", dtype=float) + drv = getattr(libdft, 'int_gauss_charge_v_rs', None) + + if mydf.rhoG is None: + rhoG = _eval_rhoG(mydf, dm, hermi=1, kpts=kpts, deriv=0) + else: + rhoG = mydf.rhoG + rhoG = rhoG[...,0,:] + rhoG = rhoG.reshape(-1,ngrids) + if rhoG.shape[0] == 2: #unrestricted + rhoG = rhoG[0] + rhoG[1] + else: + assert rhoG.shape[0] == 1 + rhoG = rhoG[0] + + coulG = tools.get_coulG(cell, mesh=mesh) + vG = numpy.multiply(rhoG, coulG) + + v_rs = numpy.asarray(tools.ifft(vG, mesh).real, order="C") + try: + drv(getattr(libdft, eval_fn), + grad.ctypes.data_as(ctypes.c_void_p), + v_rs.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(comp), + atm.ctypes.data_as(ctypes.c_void_p), + bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(len(bas)), + env.ctypes.data_as(ctypes.c_void_p), + mesh.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(cell.dimension), + a.ctypes.data_as(ctypes.c_void_p), + b.ctypes.data_as(ctypes.c_void_p), ctypes.c_double(max_radius)) + except Exception as e: + raise RuntimeError("Failed to computed nuclear gradients of vpploc part1. %s" % e) + grad *= -1 + t0 = logger.timer(mydf, 'vpploc_part1_nuc_grad', *t0) + return grad + + +def fake_cell_vloc_part1(cell, atm_id=None, precision=None): + ''' + Generate fakecell for the non-local term of the local part of + the GTH pseudo-potential. Also stores the atomic radii. + Differs from pp_int.fake_cell_vloc(cell, cn=0) in the normalization factors. + ''' + from pyscf.pbc.gto.cell import pgf_rcut + if atm_id is None: + atm_id = numpy.arange(cell.natm) + else: + atm_id = numpy.asarray(atm_id) + natm = len(atm_id) + + if precision is None: + precision = cell.precision + + max_radius = 0 + kind = {} + # FIXME prec may be too tight + prec = precision ** 2 + for symb in cell._pseudo: + charge = numpy.sum(cell._pseudo[symb][0]) + rloc = cell._pseudo[symb][1] + zeta = .5 / rloc**2 + norm = (zeta / numpy.pi) ** 1.5 + radius = pgf_rcut(0, zeta, charge*norm, precision=prec) + max_radius = max(radius, max_radius) + kind[symb] = [zeta, norm, radius] + + fake_env = [cell.atom_coords()[atm_id].ravel()] + fake_atm = cell._atm[atm_id].copy().reshape(natm,-1) + fake_atm[:,gto.PTR_COORD] = numpy.arange(0, natm*3, 3) + ptr = natm * 3 + fake_bas = [] + for ia, atm in enumerate(atm_id): + if cell.atom_charge(atm) == 0: # pass ghost atoms + continue + + symb = cell.atom_symbol(atm) + if symb in kind: + fake_env.append(kind[symb]) + else: + alpha = 1e16 + norm = (alpha / numpy.pi) ** 1.5 + radius = 0.0 + fake_env.append([alpha, norm, radius]) + fake_bas.append([ia, 0, 1, 1, 0, ptr, ptr+1, 0]) + fake_atm[ia,gto.PTR_RADIUS] = ptr+2 + ptr += 3 + + fakecell = cell.copy(deep=False) + fakecell._atm = numpy.asarray(fake_atm, order="C", dtype=numpy.int32) + fakecell._bas = numpy.asarray(fake_bas, order="C", dtype=numpy.int32).reshape(-1, gto.BAS_SLOTS) + fakecell._env = numpy.asarray(numpy.hstack(fake_env), order="C", dtype=float) + return fakecell, max_radius diff --git a/pyscf/pbc/dft/multigrid/utils.py b/pyscf/pbc/dft/multigrid/utils.py new file mode 100644 index 0000000000..3ca9f0addb --- /dev/null +++ b/pyscf/pbc/dft/multigrid/utils.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Qiming Sun +# + +import numpy +from pyscf import lib + +def _take_4d(a, indices): + a_shape = a.shape + ranges = [] + for i, s in enumerate(indices): + if s is None: + idx = numpy.arange(a_shape[i], dtype=numpy.int32) + else: + idx = numpy.asarray(s, dtype=numpy.int32) + idx[idx < 0] += a_shape[i] + ranges.append(idx) + idx = ranges[0][:,None] * a_shape[1] + ranges[1] + idy = ranges[2][:,None] * a_shape[3] + ranges[3] + a = a.reshape(a_shape[0]*a_shape[1], a_shape[2]*a_shape[3]) + out = lib.take_2d(a, idx.ravel(), idy.ravel()) + return out.reshape([len(s) for s in ranges]) + +def _takebak_4d(out, a, indices): + out_shape = out.shape + a_shape = a.shape + ranges = [] + for i, s in enumerate(indices): + if s is None: + idx = numpy.arange(a_shape[i], dtype=numpy.int32) + else: + idx = numpy.asarray(s, dtype=numpy.int32) + idx[idx < 0] += out_shape[i] + assert (len(idx) == a_shape[i]) + ranges.append(idx) + idx = ranges[0][:,None] * out_shape[1] + ranges[1] + idy = ranges[2][:,None] * out_shape[3] + ranges[3] + nx = idx.size + ny = idy.size + out = out.reshape(out_shape[0]*out_shape[1], out_shape[2]*out_shape[3]) + lib.takebak_2d(out, a.reshape(nx,ny), idx.ravel(), idy.ravel()) + return out + +def _take_5d(a, indices): + a_shape = a.shape + a = a.reshape((a_shape[0]*a_shape[1],) + a_shape[2:]) + indices = (None,) + indices[2:] + return _take_4d(a, indices) + +def _takebak_5d(out, a, indices): + a_shape = a.shape + out_shape = out.shape + a = a.reshape((a_shape[0]*a_shape[1],) + a_shape[2:]) + out = out.reshape((out_shape[0]*out_shape[1],) + out_shape[2:]) + indices = (None,) + indices[2:] + return _takebak_4d(out, a, indices) diff --git a/pyscf/pbc/dft/rks.py b/pyscf/pbc/dft/rks.py index 228bc6e91a..d3dc8d1047 100644 --- a/pyscf/pbc/dft/rks.py +++ b/pyscf/pbc/dft/rks.py @@ -73,7 +73,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = multigrid.nr_rks(ks.with_df, ks.xc, dm, hermi, kpt.reshape(1,3), kpts_band, with_j=True, return_j=False) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) return vxc @@ -87,7 +87,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, max_memory = ks.max_memory - lib.current_memory()[0] n, exc, vxc = ni.nr_rks(cell, ks.grids, ks.xc, dm, 0, hermi, kpt, kpts_band, max_memory=max_memory) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) if ks.nlc or ni.libxc.is_nlc(ks.xc): if ni.libxc.is_nlc(ks.xc): xc = ks.xc @@ -98,7 +98,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, max_memory=max_memory) exc += enlc vxc += vnlc - logger.debug(ks, 'nelec with nlc grids = %s', n) + logger.info(ks, 'nelec with nlc grids = %s', n) t0 = logger.timer(ks, 'vxc', *t0) if not hybrid: diff --git a/pyscf/pbc/dft/test/test_krks_ksym.py b/pyscf/pbc/dft/test/test_krks_ksym.py index 6c7bd46c4a..615f1d456f 100644 --- a/pyscf/pbc/dft/test/test_krks_ksym.py +++ b/pyscf/pbc/dft/test/test_krks_ksym.py @@ -207,14 +207,14 @@ def test_rsh_mdf(self): def test_multigrid(self): kmf0 = krks.KRKS(cell, kpts=cell.make_kpts(nk)) kmf0.xc = 'lda' - kmf0 = multigrid.multigrid(kmf0) + kmf0 = multigrid.multigrid_fftdf(kmf0) kmf0.kernel() rho0 = kmf0.get_rho() kpts = cell.make_kpts(nk,space_group_symmetry=True,time_reversal_symmetry=True) kmf = pscf.KRKS(cell, kpts=kpts) kmf.xc = 'lda' - kmf = multigrid.multigrid(kmf) + kmf = multigrid.multigrid_fftdf(kmf) kmf.kernel() self.assertAlmostEqual(kmf.e_tot, kmf0.e_tot, 7) rho = kmf.get_rho() @@ -231,14 +231,14 @@ def test_multigrid(self): def test_multigrid_kuks(self): kmf0 = pscf.KUKS(cell, kpts=cell.make_kpts(nk)) kmf0.xc = 'lda' - kmf0 = multigrid.multigrid(kmf0) + kmf0 = multigrid.multigrid_fftdf(kmf0) kmf0.kernel() rho0 = kmf0.get_rho() kpts = cell.make_kpts(nk,space_group_symmetry=True,time_reversal_symmetry=True) kmf = pscf.KUKS(cell, kpts=kpts) kmf.xc = 'lda' - kmf = multigrid.multigrid(kmf) + kmf = multigrid.multigrid_fftdf(kmf) kmf.kernel() self.assertAlmostEqual(kmf.e_tot, kmf0.e_tot, 7) rho = kmf.get_rho() diff --git a/pyscf/pbc/dft/test/test_multigrid.py b/pyscf/pbc/dft/test/test_multigrid.py index 2cd11e7732..9db362ded3 100644 --- a/pyscf/pbc/dft/test/test_multigrid.py +++ b/pyscf/pbc/dft/test/test_multigrid.py @@ -85,12 +85,24 @@ def test_orth_get_pp(self): self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 8) + # test small memory + mydf = multigrid.MultiGridFFTDF(cell_orth) + mydf.max_memory = 10 + out = mydf.get_pp(max_memory=2) + self.assertAlmostEqual(abs(ref-out).max(), 0, 8) + def test_nonorth_get_pp(self): ref = df.FFTDF(cell_nonorth).get_pp() out = multigrid.MultiGridFFTDF(cell_nonorth).get_pp() self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 8) + # test small memory + mydf = multigrid.MultiGridFFTDF(cell_nonorth) + mydf.max_memory = 10 + out = mydf.get_pp(max_memory=2) + self.assertAlmostEqual(abs(ref-out).max(), 0, 8) + def test_orth_get_nuc_kpts(self): ref = df.FFTDF(cell_orth).get_nuc(kpts) out = multigrid.MultiGridFFTDF(cell_orth).get_nuc(kpts) @@ -133,7 +145,7 @@ def test_multigrid_kuks(self): mf = dft.KUKS(cell_he) mf.xc = 'lda,' ref = mf.get_veff(cell_he, numpy.array((dm_he,dm_he)), kpts=kpts) - out = multigrid.multigrid(mf).get_veff(cell_he, (dm_he,dm_he), kpts=kpts) + out = multigrid.multigrid_fftdf(mf).get_veff(cell_he, (dm_he,dm_he), kpts=kpts) self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 8) self.assertAlmostEqual(abs(ref.exc-out.exc).max(), 0, 8) @@ -143,7 +155,7 @@ def test_multigrid_krks(self): mf = dft.KRKS(cell_he) mf.xc = 'lda,' ref = mf.get_veff(cell_he, dm_he, kpts=kpts) - out = multigrid.multigrid(mf).get_veff(cell_he, dm_he, kpts=kpts) + out = multigrid.multigrid_fftdf(mf).get_veff(cell_he, dm_he, kpts=kpts) self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 8) self.assertAlmostEqual(abs(ref.exc-out.exc).max(), 0, 8) @@ -159,7 +171,7 @@ def test_multigrid_kroks(self): dm1 = lib.tag_array(numpy.array([dm1,dm1]), mo_coeff=mo, mo_occ=mo_occ*2) ref = mf.get_veff(cell_he, dm1, kpts=kpts) - out = multigrid.multigrid(mf).get_veff(cell_he, dm1, kpts=kpts) + out = multigrid.multigrid_fftdf(mf).get_veff(cell_he, dm1, kpts=kpts) self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 7) self.assertAlmostEqual(abs(ref.exc-out.exc).max(), 0, 7) @@ -169,7 +181,7 @@ def test_multigrid_uks(self): mf = dft.UKS(cell_he) mf.xc = 'lda,' ref = mf.get_veff(cell_he, numpy.array((dm_he[0],dm_he[0]))) - out = multigrid.multigrid(mf).get_veff(cell_he, (dm_he[0], dm_he[0])) + out = multigrid.multigrid_fftdf(mf).get_veff(cell_he, (dm_he[0], dm_he[0])) self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 7) self.assertAlmostEqual(abs(ref.exc-out.exc).max(), 0, 7) @@ -179,7 +191,7 @@ def test_multigrid_rks(self): mf = dft.RKS(cell_he) mf.xc = 'lda,' ref = mf.get_veff(cell_he, dm_he[0]) - out = multigrid.multigrid(mf).get_veff(cell_he, dm_he[0]) + out = multigrid.multigrid_fftdf(mf).get_veff(cell_he, dm_he[0]) self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 7) self.assertAlmostEqual(abs(ref.exc-out.exc).max(), 0, 7) @@ -195,7 +207,7 @@ def test_multigrid_roks(self): dm1 = lib.tag_array(numpy.array([dm1,dm1]), mo_coeff=mo, mo_occ=mo_occ*2) ref = mf.get_veff(cell_he, dm1) - out = multigrid.multigrid(mf).get_veff(cell_he, dm1) + out = multigrid.multigrid_fftdf(mf).get_veff(cell_he, dm1) self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 7) self.assertAlmostEqual(abs(ref.exc-out.exc).max(), 0, 7) @@ -218,8 +230,8 @@ def test_eval_rhoG_orth_kpts(self): numpy.random.seed(9) dm = numpy.random.random(dm1.shape) + numpy.random.random(dm1.shape) * 1j mydf = multigrid.MultiGridFFTDF(cell_orth) - rhoG = multigrid._eval_rhoG(mydf, dm, hermi=0, kpts=kpts, deriv=0, - rhog_high_order=True) + rhoG = multigrid.multigrid._eval_rhoG(mydf, dm, hermi=0, kpts=kpts, deriv=0, + rhog_high_order=True) self.assertTrue(rhoG.dtype == numpy.complex128) mydf = df.FFTDF(cell_orth) @@ -232,8 +244,8 @@ def test_eval_rhoG_orth_kpts(self): def test_eval_rhoG_orth_gga(self): mydf = multigrid.MultiGridFFTDF(cell_orth) - rhoG = multigrid._eval_rhoG(mydf, dm, hermi=1, kpts=kpts, deriv=1, - rhog_high_order=True) + rhoG = multigrid.multigrid._eval_rhoG(mydf, dm, hermi=1, kpts=kpts, deriv=1, + rhog_high_order=True) mydf = df.FFTDF(cell_orth) ni = dft.numint.KNumInt() @@ -245,8 +257,8 @@ def test_eval_rhoG_orth_gga(self): def test_eval_rhoG_nonorth_gga(self): mydf = multigrid.MultiGridFFTDF(cell_nonorth) - rhoG = multigrid._eval_rhoG(mydf, dm, hermi=1, kpts=kpts, deriv=1, - rhog_high_order=True) + rhoG = multigrid.multigrid._eval_rhoG(mydf, dm, hermi=1, kpts=kpts, deriv=1, + rhog_high_order=True) mydf = df.FFTDF(cell_nonorth) ni = dft.numint.KNumInt() @@ -273,7 +285,7 @@ def test_gen_rhf_response(self): hermi=1, kpts=kpts) vj = mydf.get_jk(dm1, with_k=False, kpts=kpts)[0] ref += vj - v = multigrid._gen_rhf_response(mf, dm_he, hermi=1)(dm1) + v = multigrid.multigrid._gen_rhf_response(mf, dm_he, hermi=1)(dm1) self.assertEqual(ref.dtype, v.dtype) self.assertEqual(ref.shape, v.shape) self.assertAlmostEqual(abs(v-ref).max(), 0, 8) @@ -282,7 +294,7 @@ def test_gen_rhf_response(self): ref = dft.numint.nr_rks_fxc(ni, cell_he, mydf.grids, mf.xc, dm_he, dm1, hermi=1, kpts=kpts) ref += vj - v = multigrid._gen_rhf_response(mf, dm_he, hermi=1)(dm1) + v = multigrid.multigrid._gen_rhf_response(mf, dm_he, hermi=1)(dm1) self.assertEqual(ref.dtype, v.dtype) self.assertEqual(ref.shape, v.shape) self.assertAlmostEqual(abs(v-ref).max(), 0, 6) @@ -356,7 +368,7 @@ def test_nr_rks_fxc_st(self): mf.xc = 'b88,' ref = dft.numint.nr_rks_fxc_st(ni, cell_he, mydf.grids, mf.xc, dm_he, dm1, singlet=True, kpts=kpts) - v = multigrid._gen_rhf_response(mf, dm_he, singlet=True)(dm1) + v = multigrid.multigrid._gen_rhf_response(mf, dm_he, singlet=True)(dm1) self.assertEqual(ref.dtype, v.dtype) self.assertEqual(ref.shape, v.shape) self.assertAlmostEqual(abs(v-ref).max(), 0, 5) @@ -364,7 +376,7 @@ def test_nr_rks_fxc_st(self): mf.xc = 'lda,' ref = dft.numint.nr_rks_fxc_st(ni, cell_he, mydf.grids, mf.xc, dm_he, dm1, singlet=False, kpts=kpts) - v = multigrid._gen_rhf_response(mf, dm_he, singlet=False)(dm1) + v = multigrid.multigrid._gen_rhf_response(mf, dm_he, singlet=False)(dm1) self.assertEqual(ref.dtype, v.dtype) self.assertEqual(ref.shape, v.shape) self.assertAlmostEqual(abs(v-ref).max(), 0, 4) @@ -391,7 +403,7 @@ def test_gen_uhf_response(self): ref = dft.numint.nr_uks_fxc(ni, cell_he, mydf.grids, mf.xc, dm_he, dm1, hermi=1) vj = mydf.get_jk(dm1, with_k=False)[0] ref += vj[0] + vj[1] - v = multigrid._gen_uhf_response(mf, dm_he, with_j=True, hermi=1)(dm1) + v = multigrid.multigrid._gen_uhf_response(mf, dm_he, with_j=True, hermi=1)(dm1) self.assertEqual(ref.dtype, v.dtype) self.assertEqual(ref.shape, v.shape) self.assertAlmostEqual(abs(v-ref).max(), 0, 7) @@ -399,7 +411,7 @@ def test_gen_uhf_response(self): mf.xc = 'b88,' ref = dft.numint.nr_uks_fxc(ni, cell_he, mydf.grids, mf.xc, dm_he, dm1, hermi=1) ref += vj[0] + vj[1] - v = multigrid._gen_uhf_response(mf, dm_he, with_j=True, hermi=1)(dm1) + v = multigrid.multigrid._gen_uhf_response(mf, dm_he, with_j=True, hermi=1)(dm1) self.assertEqual(ref.dtype, v.dtype) self.assertEqual(ref.shape, v.shape) self.assertAlmostEqual(abs(v-ref).max(), 0, 7) @@ -454,11 +466,11 @@ def test_orth_uks_fxc_hermi0(self): def test_rcut_vs_ke_cut(self): xc = 'lda,' - with lib.temporary_env(multigrid, TASKS_TYPE='rcut'): + with lib.temporary_env(multigrid.multigrid, TASKS_TYPE='rcut'): mg_df = multigrid.MultiGridFFTDF(cell_orth) n1, exc1, v1 = multigrid.nr_rks(mg_df, xc, dm1, kpts=kpts) self.assertEqual(len(mg_df.tasks), 3) - with lib.temporary_env(multigrid, TASKS_TYPE='ke_cut'): + with lib.temporary_env(multigrid.multigrid, TASKS_TYPE='ke_cut'): mg_df = multigrid.MultiGridFFTDF(cell_orth) n2, exc2, v2 = multigrid.nr_rks(mg_df, xc, dm1, kpts=kpts) self.assertEqual(len(mg_df.tasks), 6) diff --git a/pyscf/pbc/dft/test/test_multigrid2.py b/pyscf/pbc/dft/test/test_multigrid2.py new file mode 100644 index 0000000000..f23c687a48 --- /dev/null +++ b/pyscf/pbc/dft/test/test_multigrid2.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import unittest +import numpy +from pyscf.pbc import gto, dft +from pyscf.pbc.dft import multigrid +from pyscf.pbc.grad import rks as rks_grad +from pyscf.pbc.grad import uks as uks_grad +from pyscf.pbc.grad import krks as krks_grad + +def setUpModule(): + global cell + cell = gto.Cell() + boxlen = 5.0 + cell.a = numpy.array([[boxlen,0.0,0.0], + [0.0,boxlen,0.0], + [0.0,0.0,boxlen]]) + cell.atom = """ + O 1.84560 1.21649 1.10372 + H 2.30941 1.30070 1.92953 + H 0.91429 1.26674 1.28886 + """ + cell.basis = 'gth-szv' + cell.ke_cutoff = 200 + cell.pseudo = 'gth-pade' + cell.verbose = 0 + cell.use_loose_rcut = True + cell.build() + +def tearDownModule(): + global cell + del cell + +def _fftdf_energy_grad(cell, xc): + mf = dft.KRKS(cell, kpts=numpy.zeros((1,3))) + mf.xc = xc + e = mf.kernel() + grad = krks_grad.Gradients(mf) + g = grad.kernel() + return e, g + +def _multigrid2_energy_grad(cell, xc, spin=0): + if spin == 0: + mf = dft.RKS(cell) + elif spin == 1: + mf = dft.UKS(cell) + mf.xc = xc + mf.with_df = multigrid.MultiGridFFTDF2(cell) + e = mf.kernel() + if spin == 0: + g = rks_grad.Gradients(mf).kernel() + elif spin == 1: + g = uks_grad.Gradients(mf).kernel() + return e, g + +class KnownValues(unittest.TestCase): + def test_orth_lda(self): + xc = 'lda, vwn' + e0, g0 = _fftdf_energy_grad(cell, xc) + e, g = _multigrid2_energy_grad(cell, xc, 0) + e1, g1 = _multigrid2_energy_grad(cell, xc, 1) + assert abs(e-e0) < 1e-8 + assert abs(e1-e0) < 1e-8 + assert abs(g-g0).max() < 2e-5 + assert abs(g1-g0).max() < 2e-5 + + def test_orth_gga(self): + xc = 'pbe, pbe' + e0, g0 = _fftdf_energy_grad(cell, xc) + e, g = _multigrid2_energy_grad(cell, xc, 0) + e1, g1 = _multigrid2_energy_grad(cell, xc, 1) + assert abs(e-e0) < 1e-6 + assert abs(e1-e0) < 1e-6 + assert abs(g-g0).max() < 1e-4 + assert abs(g1-g0).max() < 1e-4 + +if __name__ == '__main__': + print("Full Tests for multigrid2") + unittest.main() diff --git a/pyscf/pbc/dft/uks.py b/pyscf/pbc/dft/uks.py index de72d6452d..20d8d14c71 100644 --- a/pyscf/pbc/dft/uks.py +++ b/pyscf/pbc/dft/uks.py @@ -57,7 +57,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = multigrid.nr_uks(ks.with_df, ks.xc, dm, hermi, kpt.reshape(1,3), kpts_band, with_j=True, return_j=False) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) return vxc @@ -86,7 +86,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, 0, hermi, kpt, max_memory=max_memory) exc += enlc vxc += vnlc - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) if not hybrid: diff --git a/pyscf/pbc/grad/__init__.py b/pyscf/pbc/grad/__init__.py index 5408a1eb50..e308bca1e1 100644 --- a/pyscf/pbc/grad/__init__.py +++ b/pyscf/pbc/grad/__init__.py @@ -19,7 +19,10 @@ ''' Analytical nuclear gradients for PBC ''' - +from pyscf.pbc.grad import rhf +from pyscf.pbc.grad import rks +from pyscf.pbc.grad import uhf +from pyscf.pbc.grad import uks from pyscf.pbc.grad import krhf from pyscf.pbc.grad import kuhf from pyscf.pbc.grad import krks @@ -30,4 +33,4 @@ from pyscf.pbc.grad.krks import Gradients as KRKS from pyscf.pbc.grad.kuks import Gradients as KUKS -grad_nuc = krhf.grad_nuc +grad_nuc = rhf.grad_nuc diff --git a/pyscf/pbc/grad/krhf.py b/pyscf/pbc/grad/krhf.py index 9fd628882f..0dd6a171e4 100644 --- a/pyscf/pbc/grad/krhf.py +++ b/pyscf/pbc/grad/krhf.py @@ -211,6 +211,10 @@ def hcore_deriv(atm_id): def grad_nuc(cell, atmlst): ''' Derivatives of nuclear repulsion energy wrt nuclear coordinates + + Notes: + An optimized version of this function is available in + `pbc.gto.ewald_methods.ewald_nuc_grad` ''' chargs = cell.atom_charges() ew_eta, ew_cut = cell.get_ewald_params() @@ -244,12 +248,14 @@ def grad_nuc(cell, atmlst): absG2[absG2==0] = 1e200 ewg_grad = np.zeros([natom,3]) SI = cell.get_SI(Gv) - if cell.low_dim_ft_type is None or cell.dimension == 3: + if cell.dimension != 2 or cell.low_dim_ft_type == 'inf_vacuum': coulG = 4*np.pi / absG2 coulG *= weights ZSI = np.einsum("i,ij->j", chargs, SI) ZexpG2 = coulG * np.exp(-absG2/(4*ew_eta**2)) ZexpG2_mod = ZexpG2.reshape(len(ZexpG2),1) * Gv + else: + raise NotImplementedError for i, qi in enumerate(chargs): Zfac = np.imag(ZSI * SI[i].conj()) * qi ewg_grad[i] = - np.sum(Zfac.reshape((len(Zfac),1)) * ZexpG2_mod, axis = 0) diff --git a/pyscf/pbc/grad/rhf.py b/pyscf/pbc/grad/rhf.py new file mode 100644 index 0000000000..720451b719 --- /dev/null +++ b/pyscf/pbc/grad/rhf.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import ctypes +import numpy as np +from pyscf import __config__ +from pyscf import lib +from pyscf.lib import logger +from pyscf.grad import rhf as mol_rhf +from pyscf.grad.rhf import _write +from pyscf.pbc.gto.pseudo import pp_int +from pyscf.pbc.lib.kpts_helper import gamma_point + +SCREEN_VHF_DM_CONTRA = getattr(__config__, 'pbc_rhf_grad_screen_vhf_dm_contract', True) +libpbc = lib.load_library('libpbc') + +def grad_elec(mf_grad, mo_energy=None, mo_coeff=None, mo_occ=None, + atmlst=None, kpt=np.zeros(3)): + mf = mf_grad.base + mol = mf_grad.mol + if mo_energy is None: mo_energy = mf.mo_energy + if mo_occ is None: mo_occ = mf.mo_occ + if mo_coeff is None: mo_coeff = mf.mo_coeff + log = logger.Logger(mf_grad.stdout, mf_grad.verbose) + + s1 = mf_grad.get_ovlp(mol, kpt) + dm0 = mf.make_rdm1(mo_coeff, mo_occ) + + t0 = (logger.process_clock(), logger.perf_counter()) + log.debug('Computing Gradients of NR-HF Coulomb repulsion') + vhf = mf_grad.get_veff(mol, dm0, kpt) + log.timer('gradients of 2e part', *t0) + + dme0 = mf_grad.make_rdm1e(mo_energy, mo_coeff, mo_occ) + + if atmlst is None: + atmlst = range(mol.natm) + + de = 0 + if gamma_point(kpt): + de = mf.with_df.vpploc_part1_nuc_grad(dm0, kpts=kpt.reshape(-1,3)) + de += pp_int.vpploc_part2_nuc_grad(mol, dm0) + de += pp_int.vppnl_nuc_grad(mol, dm0) + h1ao = -mol.pbc_intor('int1e_ipkin', kpt=kpt) + if getattr(mf.with_df, 'vpplocG_part1', None) is None: + h1ao += -mf.with_df.get_vpploc_part1_ip1(kpts=kpt.reshape(-1,3)) + de += _contract_vhf_dm(mf_grad, np.add(h1ao, vhf), dm0) * 2 + de += _contract_vhf_dm(mf_grad, s1, dme0) * -2 + h1ao = s1 = vhf = dm0 = dme0 = None + de = de[atmlst] + else: + raise NotImplementedError + + for k, ia in enumerate(atmlst): + de[k] += mf_grad.extra_force(ia, locals()) + + if log.verbose >= logger.DEBUG: + log.debug('gradients of electronic part') + _write(log, mol, de, atmlst) + return de + + +def _contract_vhf_dm(mf_grad, vhf, dm, comp=3, atmlst=None, + screen=SCREEN_VHF_DM_CONTRA): + from pyscf.gto.mole import ao_loc_nr, ATOM_OF + from pyscf.pbc.gto import build_neighbor_list_for_shlpairs, free_neighbor_list + + t0 = (logger.process_clock(), logger.perf_counter()) + + mol = mf_grad.mol + natm = mol.natm + nbas = mol.nbas + shls_slice = np.asarray([0,nbas,0,nbas], order="C", dtype=np.int32) + ao_loc = np.asarray(ao_loc_nr(mol), order="C", dtype=np.int32) + shls_atm = np.asarray(mol._bas[:,ATOM_OF].copy(), order="C", dtype=np.int32) + + de = np.zeros((natm,comp), order="C") + vhf = np.asarray(vhf, order="C") + dm = np.asarray(dm, order="C") + + if screen: + neighbor_list = build_neighbor_list_for_shlpairs(mol) + else: + neighbor_list = lib.c_null_ptr() + func = getattr(libpbc, "contract_vhf_dm", None) + try: + func(de.ctypes.data_as(ctypes.c_void_p), + vhf.ctypes.data_as(ctypes.c_void_p), + dm.ctypes.data_as(ctypes.c_void_p), + ctypes.byref(neighbor_list), + shls_slice.ctypes.data_as(ctypes.c_void_p), + ao_loc.ctypes.data_as(ctypes.c_void_p), + shls_atm.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(comp), ctypes.c_int(natm), + ctypes.c_int(nbas)) + except RuntimeError: + raise + free_neighbor_list(neighbor_list) + + if atmlst is not None: + de = de[atmlst] + + logger.timer(mf_grad, '_contract_vhf_dm', *t0) + return de + + +def get_ovlp(cell, kpt=np.zeros(3)): + return -cell.pbc_intor('int1e_ipovlp', kpt=kpt) + + +def get_veff(mf_grad, mol, dm, kpt=np.zeros(3)): + mf = mf_grad.base + mydf = mf.with_df + xc_code = getattr(mf, 'xc', None) + kpts = kpt.reshape(-1,3) + return -mydf.get_veff_ip1(dm, xc_code=xc_code, kpts=kpts) + + +def grad_nuc(cell, atmlst=None, ew_eta=None, ew_cut=None): + from pyscf.pbc.gto import ewald_methods + + t0 = (logger.process_clock(), logger.perf_counter()) + + grad = ewald_methods.ewald_nuc_grad(cell, ew_eta, ew_cut) + if atmlst is not None: + grad = grad[atmlst] + + logger.timer(cell, 'nuclear gradient', *t0) + return grad + + +class GradientsBase(mol_rhf.GradientsBase): + '''Base class for Gamma-point nuclear gradient''' + def grad_nuc(self, mol=None, atmlst=None): + if mol is None: mol = self.mol + return grad_nuc(mol, atmlst) + + def get_ovlp(self, mol=None, kpt=np.zeros(3)): + if mol is None: + mol = self.mol + return get_ovlp(mol, kpt) + + +class Gradients(GradientsBase): + '''Non-relativistic Gamma-point restricted Hartree-Fock gradients''' + def get_veff(self, mol=None, dm=None, kpt=np.zeros(3)): + if mol is None: mol = self.mol + if dm is None: dm = self.base.make_rdm1() + return get_veff(self, mol, dm, kpt) + + make_rdm1e = mol_rhf.Gradients.make_rdm1e + grad_elec = grad_elec diff --git a/pyscf/pbc/grad/rks.py b/pyscf/pbc/grad/rks.py new file mode 100644 index 0000000000..1429050002 --- /dev/null +++ b/pyscf/pbc/grad/rks.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +from pyscf.pbc.grad import rhf + + +class Gradients(rhf.Gradients): + '''Non-relativistic Gamma-point restricted Kohn-Sham DFT gradients''' + pass diff --git a/pyscf/pbc/grad/uhf.py b/pyscf/pbc/grad/uhf.py new file mode 100644 index 0000000000..fd71aa0920 --- /dev/null +++ b/pyscf/pbc/grad/uhf.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import ctypes +import numpy as np +from pyscf import __config__ +from pyscf.lib import logger +from pyscf.grad import uhf as mol_uhf +from pyscf.grad.rhf import _write +from pyscf.pbc.gto.pseudo import pp_int +from pyscf.pbc.grad import rhf as rhf_grad +from pyscf.pbc.lib.kpts_helper import gamma_point + +def grad_elec(mf_grad, mo_energy=None, mo_coeff=None, mo_occ=None, atmlst=None, kpt=np.zeros(3)): + mf = mf_grad.base + mol = mf_grad.mol + if mo_energy is None: mo_energy = mf.mo_energy + if mo_occ is None: mo_occ = mf.mo_occ + if mo_coeff is None: mo_coeff = mf.mo_coeff + log = logger.Logger(mf_grad.stdout, mf_grad.verbose) + + s1 = mf_grad.get_ovlp(mol, kpt) + dm0 = mf.make_rdm1(mo_coeff, mo_occ) + + t0 = (logger.process_clock(), logger.perf_counter()) + log.debug('Computing Gradients of NR-HF Coulomb repulsion') + vhf = mf_grad.get_veff(mol, dm0, kpt) + log.timer('gradients of 2e part', *t0) + + dme0 = mf_grad.make_rdm1e(mo_energy, mo_coeff, mo_occ) + dm0_sf = dm0[0] + dm0[1] + dme0_sf = dme0[0] + dme0[1] + + if atmlst is None: + atmlst = range(mol.natm) + + de = 0 + if gamma_point(kpt): + de = mf.with_df.vpploc_part1_nuc_grad(dm0_sf, kpts=kpt.reshape(-1,3)) + de += pp_int.vpploc_part2_nuc_grad(mol, dm0_sf) + de += pp_int.vppnl_nuc_grad(mol, dm0_sf) + h1ao = -mol.pbc_intor('int1e_ipkin', kpt=kpt) + if getattr(mf.with_df, 'vpplocG_part1', None) is None: + h1ao += -mf.with_df.get_vpploc_part1_ip1(kpts=kpt.reshape(-1,3)) + de += rhf_grad._contract_vhf_dm(mf_grad, h1ao, dm0_sf) * 2 + for s in range(2): + de += rhf_grad._contract_vhf_dm(mf_grad, vhf[s], dm0[s]) * 2 + de += rhf_grad._contract_vhf_dm(mf_grad, s1, dme0_sf) * -2 + h1ao = s1 = vhf = dm0 = dme0 = dm0_sf = dme0_sf = None + de = de[atmlst] + else: + raise NotImplementedError + + for k, ia in enumerate(atmlst): + de[k] += mf_grad.extra_force(ia, locals()) + + if log.verbose >= logger.DEBUG: + log.debug('gradients of electronic part') + _write(log, mol, de, atmlst) + return de + +def get_veff(mf_grad, mol, dm, kpt=np.zeros(3)): + mf = mf_grad.base + mydf = mf.with_df + xc_code = getattr(mf, 'xc', None) + kpts = kpt.reshape(-1,3) + return -mydf.get_veff_ip1(dm, xc_code=xc_code, kpts=kpts, spin=1) + +class Gradients(rhf_grad.GradientsBase): + '''Non-relativistic Gamma-point restricted Hartree-Fock gradients''' + def get_veff(self, mol=None, dm=None, kpt=np.zeros(3)): + if mol is None: mol = self.mol + if dm is None: dm = self.base.make_rdm1() + return get_veff(self, mol, dm, kpt) + + make_rdm1e = mol_uhf.Gradients.make_rdm1e + grad_elec = grad_elec diff --git a/pyscf/pbc/grad/uks.py b/pyscf/pbc/grad/uks.py new file mode 100644 index 0000000000..4a6ce67c1a --- /dev/null +++ b/pyscf/pbc/grad/uks.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +from pyscf.pbc.grad import uhf + + +class Gradients(uhf.Gradients): + '''Non-relativistic Gamma-point unrestricted Kohn-Sham DFT gradients''' + pass diff --git a/pyscf/pbc/gto/__init__.py b/pyscf/pbc/gto/__init__.py index dcaaddebbc..769b76c616 100644 --- a/pyscf/pbc/gto/__init__.py +++ b/pyscf/pbc/gto/__init__.py @@ -22,6 +22,7 @@ from pyscf.pbc.gto.basis import parse, load, parse_ecp, load_ecp from pyscf.pbc.gto import pseudo from pyscf.pbc.gto.cell import * +from pyscf.pbc.gto.neighborlist import * parse_pp = parsepp = pseudo.parse load_pp = loadpp = pseudo.load diff --git a/pyscf/pbc/gto/_pbcintor.py b/pyscf/pbc/gto/_pbcintor.py index f721eb0304..c5b921b2e0 100644 --- a/pyscf/pbc/gto/_pbcintor.py +++ b/pyscf/pbc/gto/_pbcintor.py @@ -33,15 +33,21 @@ def __init__(self, cell): def init_rcut_cond(self, cell, precision=None): if precision is None: precision = cell.precision - rcut = numpy.array([cell.bas_rcut(ib, precision) - for ib in range(cell.nbas)]) + if cell.use_loose_rcut: + rcut = cell.rcut_by_shells(precision) + fn_set_rcut_cond = getattr(libpbc, 'PBCset_rcut_cond_loose') + else: + rcut = numpy.array([cell.bas_rcut(ib, precision) + for ib in range(cell.nbas)]) + fn_set_rcut_cond = getattr(libpbc, 'PBCset_rcut_cond') + natm = ctypes.c_int(cell._atm.shape[0]) nbas = ctypes.c_int(cell._bas.shape[0]) - libpbc.PBCset_rcut_cond(self._this, - rcut.ctypes.data_as(ctypes.c_void_p), - cell._atm.ctypes.data_as(ctypes.c_void_p), natm, - cell._bas.ctypes.data_as(ctypes.c_void_p), nbas, - cell._env.ctypes.data_as(ctypes.c_void_p)) + fn_set_rcut_cond(self._this, + rcut.ctypes.data_as(ctypes.c_void_p), + cell._atm.ctypes.data_as(ctypes.c_void_p), natm, + cell._bas.ctypes.data_as(ctypes.c_void_p), nbas, + cell._env.ctypes.data_as(ctypes.c_void_p)) return self def del_rcut_cond(self): @@ -56,4 +62,5 @@ def __del__(self): class _CPBCOpt(ctypes.Structure): _fields_ = [('rrcut', ctypes.c_void_p), + ('rcut', ctypes.c_void_p), ('fprescreen', ctypes.c_void_p)] diff --git a/pyscf/pbc/gto/cell.py b/pyscf/pbc/gto/cell.py index 87282fbfd4..872fda36e2 100644 --- a/pyscf/pbc/gto/cell.py +++ b/pyscf/pbc/gto/cell.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2021 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -41,6 +41,9 @@ WITH_GAMMA = getattr(__config__, 'pbc_gto_cell_make_kpts_with_gamma', True) EXP_DELIMITER = getattr(__config__, 'pbc_gto_cell_split_basis_exp_delimiter', [1.0, 0.5, 0.25, 0.1, 0]) +# defined in lib/pbc/cell.h +RCUT_EPS = 1e-3 +RCUT_MAX_CYCLE = 10 libpbc = _pbcintor.libpbc @@ -281,6 +284,89 @@ def intor_cross(intor, cell1, cell2, comp=None, hermi=0, kpts=None, kpt=None, mat = mat[0] return mat +def _intor_cross_screened( + intor, cell1, cell2, comp=None, hermi=0, kpts=None, kpt=None, + shls_slice=None, **kwargs): + '''`intor_cross` with prescreening. + + Notes: + This function may be subject to change. + ''' + from pyscf.pbc.gto.neighborlist import NeighborListOpt + intor, comp = moleintor._get_intor_and_comp(cell1._add_suffix(intor), comp) + + if kpts is None: + if kpt is not None: + kpts_lst = np.reshape(kpt, (1,3)) + else: + kpts_lst = np.zeros((1,3)) + else: + kpts_lst = np.reshape(kpts, (-1,3)) + nkpts = len(kpts_lst) + + pcell = cell1.copy(deep=False) + pcell.precision = min(cell1.precision, cell2.precision) + pcell._atm, pcell._bas, pcell._env = \ + atm, bas, env = conc_env(cell1._atm, cell1._bas, cell1._env, + cell2._atm, cell2._bas, cell2._env) + if shls_slice is None: + shls_slice = (0, cell1.nbas, 0, cell2.nbas) + i0, i1, j0, j1 = shls_slice[:4] + j0 += cell1.nbas + j1 += cell1.nbas + ao_loc = moleintor.make_loc(bas, intor) + ni = ao_loc[i1] - ao_loc[i0] + nj = ao_loc[j1] - ao_loc[j0] + out = np.empty((nkpts,comp,ni,nj), dtype=np.complex128) + + if hermi == 0: + aosym = 's1' + else: + aosym = 's2' + fill = getattr(libpbc, 'PBCnr2c_screened_fill_k'+aosym) + fintor = getattr(moleintor.libcgto, intor) + drv = libpbc.PBCnr2c_screened_drv + + rcut = max(cell1.rcut, cell2.rcut) + Ls = cell1.get_lattice_Ls(rcut=rcut) + expkL = np.asarray(np.exp(1j*np.dot(kpts_lst, Ls.T)), order='C') + + neighbor_list = kwargs.get('neighbor_list', None) + if neighbor_list is None: + nlopt = NeighborListOpt(cell1) + nlopt.build(cell1, cell2, Ls, set_optimizer=False) + neighbor_list = nlopt.nl + + cintopt = lib.c_null_ptr() + + drv(fintor, fill, out.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nkpts), ctypes.c_int(comp), ctypes.c_int(len(Ls)), + Ls.ctypes.data_as(ctypes.c_void_p), + expkL.ctypes.data_as(ctypes.c_void_p), + (ctypes.c_int*4)(i0, i1, j0, j1), + ao_loc.ctypes.data_as(ctypes.c_void_p), cintopt, + atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(pcell.natm), + bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(pcell.nbas), + env.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(env.size), + ctypes.byref(neighbor_list)) + + nlopt = None + + mat = [] + for k, kpt in enumerate(kpts_lst): + v = out[k] + if hermi != 0: + for ic in range(comp): + lib.hermi_triu(v[ic], hermi=hermi, inplace=True) + if comp == 1: + v = v[0] + if abs(kpt).sum() < 1e-9: # gamma_point + v = v.real + mat.append(v) + + if kpts is None or np.shape(kpts) == (3,): # A single k-point + mat = mat[0] + return mat def get_nimgs(cell, precision=None): r'''Choose number of basis function images in lattice sums @@ -339,6 +425,9 @@ def estimate_rcut(cell, precision=None): return 0.01 if precision is None: precision = cell.precision + if cell.use_loose_rcut: + return cell.rcut_by_shells(precision).max() + exps, cs = _extract_pgto_params(cell, 'min') ls = cell._bas[:,mole.ANG_OF] rcut = _estimate_rcut(exps, ls, cs, precision) @@ -491,7 +580,24 @@ def get_Gv_weights(cell, mesh=None, **kwargs): weights = np.einsum('i,k->ik', wxy, wz).reshape(-1) Gvbase = (rx, ry, rz) - Gv = np.dot(lib.cartesian_prod(Gvbase), b) + + #:Gv = np.dot(lib.cartesian_prod(Gvbase), b) + # NOTE mesh can be different from the input mesh + mesh = np.asarray((len(rx),len(ry),len(rz)), dtype=np.int32) + Gv = np.empty((*mesh,3), order='C', dtype=float) + b = np.asarray(b, order='C') + rx = np.asarray(rx, order='C') + ry = np.asarray(ry, order='C') + rz = np.asarray(rz, order='C') + fn = libpbc.get_Gv + fn(Gv.ctypes.data_as(ctypes.c_void_p), + rx.ctypes.data_as(ctypes.c_void_p), + ry.ctypes.data_as(ctypes.c_void_p), + rz.ctypes.data_as(ctypes.c_void_p), + mesh.ctypes.data_as(ctypes.c_void_p), + b.ctypes.data_as(ctypes.c_void_p)) + Gv = Gv.reshape(-1, 3) + # 1/cell.vol == det(b)/(2pi)^3 weights *= 1/(2*np.pi)**3 return Gv, Gvbase, weights @@ -504,7 +610,7 @@ def _non_uniform_Gv_base(n): #return np.hstack((0,rs,-rs[::-1])), np.hstack((0,ws,ws[::-1])) return np.hstack((rs,-rs[::-1])), np.hstack((ws,ws[::-1])) -def get_SI(cell, Gv=None, mesh=None): +def get_SI(cell, Gv=None, mesh=None, atmlst=None): '''Calculate the structure factor (0D, 1D, 2D, 3D) for all atoms; see MH (3.34). Args: @@ -513,11 +619,16 @@ def get_SI(cell, Gv=None, mesh=None): Gv : (N,3) array G vectors + atmlst : list of ints, optional + Indices of atoms for which the structure factors are computed. + Returns: SI : (natm, ngrids) ndarray, dtype=np.complex128 The structure factor for each atom at each G-vector. ''' coords = cell.atom_coords() + if atmlst is not None: + coords = coords[np.asarray(atmlst)] if Gv is None: if mesh is None: mesh = cell.mesh @@ -598,6 +709,10 @@ def ewald(cell, ew_eta=None, ew_cut=None): if cell.natm == 0: return 0 + if cell.dimension == 3 and cell.use_particle_mesh_ewald: + from pyscf.pbc.gto import ewald_methods + return ewald_methods.particle_mesh_ewald(cell, ew_eta, ew_cut) + chargs = cell.atom_charges() if ew_eta is None or ew_cut is None: @@ -639,7 +754,16 @@ def ewald(cell, ew_eta=None, ew_cut=None): # have relatively large error coulG = 4*np.pi / absG2 coulG *= weights - ZSI = np.einsum("i,ij->j", chargs, cell.get_SI(Gv)) + + #:ZSI = np.einsum('i,ij->j', chargs, cell.get_SI(Gv)) + ngrids = len(Gv) + ZSI = np.empty((ngrids,), dtype=np.complex128) + mem_avail = cell.max_memory - lib.current_memory()[0] + blksize = int((mem_avail*1e6 - cell.natm*24)/((3+cell.natm*2)*8)) + blksize = min(ngrids, max(mesh[2], blksize)) + for ig0, ig1 in lib.prange(0, ngrids, blksize): + np.einsum('i,ij->j', chargs, cell.get_SI(Gv[ig0:ig1]), out=ZSI[ig0:ig1]) + ZexpG2 = ZSI * np.exp(-absG2/(4*ew_eta**2)) ewg = .5 * np.einsum('i,i,i', ZSI.conj(), ZexpG2, coulG).real @@ -835,6 +959,59 @@ def _mesh_inf_vaccum(cell): # meshz has to be even number due to the symmetry on z+ and z- return int(meshz*.5 + .999) * 2 +def pgf_rcut(l, alpha, coeff, precision=INTEGRAL_PRECISION, + rcut=0, max_cycle=RCUT_MAX_CYCLE, eps=RCUT_EPS): + '''Estimate the cutoff radii of primitive Gaussian functions + based on their values in real space: + `c*rcut^(l+2)*exp(-alpha*rcut^2) ~ precision`. + ''' + c = np.log(coeff / precision) + + rmin = np.sqrt(.5 * (l+2) / alpha) * 2 + eps = np.minimum(rmin/10, eps) + rcut = np.maximum(rcut, rmin+eps) + for i in range(max_cycle): + rcut_last = rcut + rcut = np.sqrt(((l+2) * np.log(rcut) + c) / alpha) + if np.all(abs(rcut - rcut_last) < eps): + return rcut + warnings.warn(f'cell.pgf_rcut failed to converge in {max_cycle} cycles.') + return rcut + +def rcut_by_shells(cell, precision=None, rcut=0, + return_pgf_radius=False): + '''Compute shell and primitive gaussian function radii. + ''' + # TODO the internal implementation loops over all shells, + # which can be optimized to loop over atom types. + if precision is None: + precision = cell.precision + + bas = np.asarray(cell._bas, order='C') + env = np.asarray(cell._env, order='C') + nbas = len(bas) + shell_radius = np.empty((nbas,), order='C', dtype=float) + if return_pgf_radius: + nprim = bas[:,mole.NPRIM_OF].max() + # be careful that the unused memory blocks are not initialized + pgf_radius = np.empty((nbas,nprim), order='C', dtype=np.double) + ptr_pgf_radius = lib.ndarray_pointer_2d(pgf_radius) + else: + ptr_pgf_radius = lib.c_null_ptr() + fn = getattr(libpbc, 'rcut_by_shells', None) + try: + fn(shell_radius.ctypes.data_as(ctypes.c_void_p), + ptr_pgf_radius, + bas.ctypes.data_as(ctypes.c_void_p), + env.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nbas), ctypes.c_double(rcut), + ctypes.c_double(precision)) + except Exception as e: + raise RuntimeError(f'Failed to get shell radii.\n{e}') + if return_pgf_radius: + return shell_radius, pgf_radius + return shell_radius + class Cell(mole.MoleBase): '''A Cell object holds the basic information of a crystal. @@ -864,6 +1041,14 @@ class Cell(mole.MoleBase): infinity vacuum (inf_vacuum) or truncated Coulomb potential (analytic_2d_1). Unless explicitly specified, analytic_2d_1 is used for 2D system and inf_vacuum is assumed for 1D and 0D. + use_loose_rcut : bool + If set to True, a loose `rcut` determined by shell radius is used, + which is usually accurate enough for pure DFT calculations; + otherwise, a tight `rcut` determined by overlap integral is used. + Default value is False. Has no effect if `rcut` is set manually. + use_particle_mesh_ewald : bool + If set to True, use particle-mesh Ewald to compute the nuclear repulsion. + Default value is False, meaning to use classical Ewald summation. space_group_symmetry : bool Whether to consider space group symmetry. Default is False. symmorphic : bool @@ -892,6 +1077,7 @@ class Cell(mole.MoleBase): 'precision', 'exp_to_discard', 'a', 'ke_cutoff', 'pseudo', 'dimension', 'low_dim_ft_type', 'space_group_symmetry', 'symmorphic', 'lattice_symmetry', 'mesh', 'rcut', + 'use_loose_rcut', 'use_particle_mesh_ewald', } def __init__(self, **kwargs): @@ -906,6 +1092,8 @@ def __init__(self, **kwargs): # density-fitting class. This determines how the ewald produces # its energy. self.low_dim_ft_type = None + self.use_loose_rcut = False + self.use_particle_mesh_ewald = False self.space_group_symmetry = False self.symmorphic = False self.lattice_symmetry = None @@ -1082,7 +1270,9 @@ def build_lattice_symmetry(self, check_mesh_symmetry=True): def build(self, dump_input=True, parse_arg=mole.ARGPARSE, a=None, mesh=None, ke_cutoff=None, precision=None, nimgs=None, h=None, dimension=None, rcut= None, low_dim_ft_type=None, - space_group_symmetry=None, symmorphic=None, *args, **kwargs): + space_group_symmetry=None, symmorphic=None, + use_loose_rcut=None, use_particle_mesh_ewald=None, + *args, **kwargs): '''Setup Mole molecule and Cell and initialize some control parameters. Whenever you change the value of the attributes of :class:`Cell`, you need call this function to refresh the internal data of Cell. @@ -1133,6 +1323,10 @@ def build(self, dump_input=True, parse_arg=mole.ARGPARSE, if rcut is not None: self.rcut = rcut if ke_cutoff is not None: self.ke_cutoff = ke_cutoff if low_dim_ft_type is not None: self.low_dim_ft_type = low_dim_ft_type + if use_loose_rcut is not None: + self.use_loose_rcut = use_loose_rcut + if use_particle_mesh_ewald is not None: + self.use_particle_mesh_ewald = use_particle_mesh_ewald if space_group_symmetry is not None: self.space_group_symmetry = space_group_symmetry if symmorphic is not None: @@ -1265,7 +1459,7 @@ def build(self, dump_input=True, parse_arg=mole.ARGPARSE, logger.info(self, 'Cell volume = %g', self.vol) # Check atoms coordinates if self.dimension > 0 and self.natm > 0: - scaled_atom_coords = np.linalg.solve(_a.T, self.atom_coords().T).T + scaled_atom_coords = self.get_scaled_atom_coords(_a) atom_boundary_max = scaled_atom_coords[:,:self.dimension].max(axis=0) atom_boundary_min = scaled_atom_coords[:,:self.dimension].min(axis=0) if (np.any(atom_boundary_max > 1) or np.any(atom_boundary_min < -1)): @@ -1367,13 +1561,12 @@ def lattice_vectors(self): else: return a/self.unit - def get_scaled_positions(self): - ''' Get scaled atom positions. + def get_scaled_atom_coords(self, a=None): + ''' Get scaled atomic coordinates. ''' - a = self.lattice_vectors() - atm_pos = self.atom_coords() - scaled_atm_pos = np.dot(atm_pos,np.linalg.inv(a)) - return scaled_atm_pos + if a is None: + a = self.lattice_vectors() + return np.dot(self.atom_coords(), np.linalg.inv(a)) def reciprocal_vectors(self, norm_to=2*np.pi): r''' @@ -1475,6 +1668,7 @@ def loads_(self, molstr): return self bas_rcut = bas_rcut + rcut_by_shells = rcut_by_shells get_lattice_Ls = pbctools.get_lattice_Ls @@ -1511,6 +1705,10 @@ def pbc_intor(self, intor, comp=None, hermi=0, kpts=None, kpt=None, # FIXME: Whether to check _built and call build? ._bas and .basis # may not be consistent. calling .build() may leads to wrong intor env. #self.build(False, False) + if self.use_loose_rcut: + return _intor_cross_screened( + intor, self, self, comp, hermi, kpts, kpt, + shls_slice, **kwargs) return intor_cross(intor, self, self, comp, hermi, kpts, kpt, shls_slice, **kwargs) @@ -1551,6 +1749,7 @@ def to_mol(self): mol = self.view(mole.Mole) delattr(mol, 'a') delattr(mol, '_mesh') + mol.enuc = None #reset nuclear energy if mol.symmetry: mol._build_symmetry() return mol diff --git a/pyscf/pbc/gto/ewald_methods.py b/pyscf/pbc/gto/ewald_methods.py new file mode 100644 index 0000000000..75d028a564 --- /dev/null +++ b/pyscf/pbc/gto/ewald_methods.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import ctypes +import numpy as np +import scipy +from pyscf import __config__ +from pyscf import lib +from pyscf.lib import logger +from pyscf.gto import mole +from pyscf.pbc import tools + +libpbc = lib.load_library('libpbc') + +INTERPOLATION_ORDER = getattr(__config__, 'pyscf_pbc_ewald_bspline_order', 10) + +def _bspline(u, n=4): + fac = 1. / scipy.special.factorial(n-1) + M = 0 + for k in range(n+1): + fac1 = ((-1)**k) * scipy.special.binom(n, k) + M += fac1 * ((np.maximum(u-k, 0)) ** (n-1)) + M *= fac + return M + +def _bspline_grad(u, n=4): + r''' + ... math:: + \frac{dM}{du} = M_{n-1}(u) - M_{n-1}(u-1) + ''' + dMdu = _bspline(u, n-1) - _bspline(u-1, n-1) + return dMdu + +def bspline(u, ng, n=4, deriv=0): + u = np.asarray(u).ravel() + u_floor = np.floor(u) + delta = u - u_floor + idx = [] + val = [] + for i in range(n): + idx.append(np.rint((u_floor - i) % ng).astype(int)) + val.append(delta + i) + + M = np.zeros((u.size, ng)) + for i in range(n): + M[np.arange(u.size),idx[i]] += _bspline(val[i], n) + + if deriv > 0: + if deriv > 1: + raise NotImplementedError + dM = np.zeros((u.size, ng)) + for i in range(n): + dM[np.arange(u.size),idx[i]] += _bspline_grad(val[i], n) + M = [M, dM] + + m = np.arange(ng) + b = np.exp(2*np.pi*1j*(n-1)*m/ng) + tmp = 0 + for k in range(n-1): + tmp += _bspline(k+1, n) * np.exp(2*np.pi*1j*m*k/ng) + b /= tmp + if n % 2 > 0 and ng % 2 == 0 : + b[ng//2] = 0 + return M, b, idx + +def _get_ewald_direct(cell, ew_eta=None, ew_cut=None): + if ew_eta is None or ew_cut is None: + ew_eta, ew_cut = cell.get_ewald_params() + + chargs = np.asarray(cell.atom_charges(), order='C', dtype=float) + coords = np.asarray(cell.atom_coords(), order='C') + Lall = np.asarray(cell.get_lattice_Ls(rcut=ew_cut), order='C') + + natm = len(chargs) + nL = len(Lall) + ewovrl = np.zeros([1]) + fun = getattr(libpbc, "get_ewald_direct") + fun(ewovrl.ctypes.data_as(ctypes.c_void_p), + chargs.ctypes.data_as(ctypes.c_void_p), + coords.ctypes.data_as(ctypes.c_void_p), + Lall.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(ew_eta), ctypes.c_double(ew_cut), + ctypes.c_int(natm), ctypes.c_int(nL)) + return ewovrl[0] + +def _get_ewald_direct_nuc_grad(cell, ew_eta=None, ew_cut=None): + if ew_eta is None or ew_cut is None: + ew_eta, ew_cut = cell.get_ewald_params() + + chargs = np.asarray(cell.atom_charges(), order='C', dtype=float) + coords = np.asarray(cell.atom_coords(), order='C') + Lall = np.asarray(cell.get_lattice_Ls(rcut=ew_cut), order='C') + + natm = len(chargs) + nL = len(Lall) + grad = np.zeros([natm,3], order='C', dtype=float) + fun = getattr(libpbc, "get_ewald_direct_nuc_grad") + fun(grad.ctypes.data_as(ctypes.c_void_p), + chargs.ctypes.data_as(ctypes.c_void_p), + coords.ctypes.data_as(ctypes.c_void_p), + Lall.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(ew_eta), ctypes.c_double(ew_cut), + ctypes.c_int(natm), ctypes.c_int(nL)) + return grad + + +# FIXME The default interpolation order may be too high +def particle_mesh_ewald(cell, ew_eta=None, ew_cut=None, + order=INTERPOLATION_ORDER): + if cell.dimension != 3: + raise NotImplementedError("Particle mesh ewald only works for 3D.") + + chargs = cell.atom_charges() + coords = cell.atom_coords() + natm = len(coords) + + if ew_eta is None or ew_cut is None: + ew_eta, ew_cut = cell.get_ewald_params() + log_precision = np.log(cell.precision / (chargs.sum()*16*np.pi**2)) + ke_cutoff = -2*ew_eta**2*log_precision + mesh = cell.cutoff_to_mesh(ke_cutoff) + + ewovrl = _get_ewald_direct(cell, ew_eta, ew_cut) + ewself = -.5 * np.dot(chargs,chargs) * 2 * ew_eta / np.sqrt(np.pi) + if cell.dimension == 3: + ewself += -.5 * np.sum(chargs)**2 * np.pi/(ew_eta**2 * cell.vol) + + b = cell.reciprocal_vectors(norm_to=1) + u = np.dot(coords, b.T) * mesh[None,:] + + Mx, bx, idx = bspline(u[:,0], mesh[0], order) + My, by, idy = bspline(u[:,1], mesh[1], order) + Mz, bz, idz = bspline(u[:,2], mesh[2], order) + + idx = np.asarray(idx).T + idy = np.asarray(idy).T + idz = np.asarray(idz).T + Mx_s = Mx[np.arange(natm)[:,None], idx] + My_s = My[np.arange(natm)[:,None], idy] + Mz_s = Mz[np.arange(natm)[:,None], idz] + + #:Q = np.einsum('i,ix,iy,iz->xyz', chargs, Mx, My, Mz) + Q = np.zeros([*mesh]) + for ia in range(len(chargs)): + Q_s = np.einsum('x,y,z->xyz', Mx_s[ia], My_s[ia], Mz_s[ia]) + Q[np.ix_(idx[ia], idy[ia], idz[ia])] += chargs[ia] * Q_s + + B = np.einsum('x,y,z->xyz', bx*bx.conj(), by*by.conj(), bz*bz.conj()) + + Gv, Gvbase, weights = cell.get_Gv_weights(mesh) + absG2 = np.einsum('ix,ix->i', Gv, Gv) + absG2[absG2==0] = 1e200 + coulG = 4*np.pi / absG2 + C = weights * coulG * np.exp(-absG2/(4*ew_eta**2)) + C = C.reshape(*mesh) + + Q_ifft = tools.ifft(Q, mesh).reshape(*mesh) + tmp = tools.fft(B * C * Q_ifft, mesh).real.reshape(*mesh) + ewg = 0.5 * np.prod(mesh) * np.einsum('xyz,xyz->', Q, tmp) + + logger.debug(cell, 'Ewald components = %.15g, %.15g, %.15g', ewovrl, ewself, ewg) + return ewovrl + ewself + ewg + +def particle_mesh_ewald_nuc_grad(cell, ew_eta=None, ew_cut=None, + order=INTERPOLATION_ORDER): + if cell.dimension != 3: + raise NotImplementedError("Particle mesh ewald only works for 3D.") + + chargs = cell.atom_charges() + coords = cell.atom_coords() + + if ew_eta is None or ew_cut is None: + ew_eta, ew_cut = cell.get_ewald_params() + log_precision = np.log(cell.precision / (chargs.sum()*16*np.pi**2)) + ke_cutoff = -2*ew_eta**2*log_precision + mesh = cell.cutoff_to_mesh(ke_cutoff) + + grad_dir = _get_ewald_direct_nuc_grad(cell, ew_eta, ew_cut) + + b = cell.reciprocal_vectors(norm_to=1) + u = np.dot(coords, b.T) * mesh[None,:] + + [Mx, dMx], bx, idx = bspline(u[:,0], mesh[0], order, deriv=1) + [My, dMy], by, idy = bspline(u[:,1], mesh[1], order, deriv=1) + [Mz, dMz], bz, idz = bspline(u[:,2], mesh[2], order, deriv=1) + + idx = np.asarray(idx).T + idy = np.asarray(idy).T + idz = np.asarray(idz).T + Mx_s = Mx[np.indices(idx.shape)[0], idx] + My_s = My[np.indices(idy.shape)[0], idy] + Mz_s = Mz[np.indices(idz.shape)[0], idz] + dMx_s = dMx[np.indices(idx.shape)[0], idx] + dMy_s = dMy[np.indices(idy.shape)[0], idy] + dMz_s = dMz[np.indices(idz.shape)[0], idz] + + Q = np.zeros([*mesh]) + for ia in range(len(chargs)): + Q_s = np.einsum('x,y,z->xyz', Mx_s[ia], My_s[ia], Mz_s[ia]) + Q[np.ix_(idx[ia], idy[ia], idz[ia])] += chargs[ia] * Q_s + + B = np.einsum('x,y,z->xyz', bx*bx.conj(), by*by.conj(), bz*bz.conj()) + + Gv, Gvbase, weights = cell.get_Gv_weights(mesh) + absG2 = np.einsum('ix,ix->i', Gv, Gv) + absG2[absG2==0] = 1e200 + coulG = 4*np.pi / absG2 + C = weights * coulG * np.exp(-absG2/(4*ew_eta**2)) + C = C.reshape(*mesh) + + Q_ifft = tools.ifft(Q, mesh).reshape(*mesh) + tmp = tools.fft(B * C * Q_ifft, mesh).real.reshape(*mesh) + + ng = np.prod(mesh) + bK = b * mesh[:,None] + grad_rec = np.zeros_like(grad_dir) + for ia in range(len(chargs)): + mask = np.ix_(idx[ia], idy[ia], idz[ia]) + dQ_s = np.einsum('x,y,z->xyz', dMx_s[ia], My_s[ia], Mz_s[ia]) + dQdr = np.einsum('x,abc->xabc', bK[0], dQ_s) + grad_rec[ia] += np.einsum('xabc,abc->x', dQdr, tmp[mask]) + + dQ_s = np.einsum('x,y,z->xyz', Mx_s[ia], dMy_s[ia], Mz_s[ia]) + dQdr = np.einsum('x,abc->xabc', bK[1], dQ_s) + grad_rec[ia] += np.einsum('xabc,abc->x', dQdr, tmp[mask]) + + dQ_s = np.einsum('x,y,z->xyz', Mx_s[ia], My_s[ia], dMz_s[ia]) + dQdr = np.einsum('x,abc->xabc', bK[2], dQ_s) + grad_rec[ia] += np.einsum('xabc,abc->x', dQdr, tmp[mask]) + + grad_rec[ia] *= chargs[ia] * ng + + # reciprocal space summation does not conserve momentum + shift = -np.sum(grad_rec, axis=0) / len(grad_rec) + logger.debug(cell, f'Shift ewald nuclear gradient by {shift} to keep momentum conservation.') + grad_rec += shift[None,:] + + grad = grad_dir + grad_rec + return grad + +def ewald_nuc_grad(cell, ew_eta=None, ew_cut=None): + chargs = np.asarray(cell.atom_charges(), order='C', dtype=float) + coords = np.asarray(cell.atom_coords(), order='C') + + if ew_eta is None or ew_cut is None: + ew_eta, ew_cut = cell.get_ewald_params() + log_precision = np.log(cell.precision / (chargs.sum()*16*np.pi**2)) + ke_cutoff = -2*ew_eta**2*log_precision + mesh = cell.cutoff_to_mesh(ke_cutoff) + + if cell.dimension == 3 and cell.use_particle_mesh_ewald: + return particle_mesh_ewald_nuc_grad(cell, ew_eta=ew_eta, ew_cut=ew_cut) + + grad_dir = _get_ewald_direct_nuc_grad(cell, ew_eta, ew_cut) + grad_rec = np.zeros_like(grad_dir, order="C") + + Gv, _, weights = cell.get_Gv_weights(mesh) + fn = getattr(libpbc, "ewald_gs_nuc_grad") + if cell.dimension != 2 or cell.low_dim_ft_type == 'inf_vacuum': + ngrids = len(Gv) + mem_avail = cell.max_memory - lib.current_memory()[0] + if mem_avail <= 0: + logger.warn(cell, "Not enough memory for computing ewald force.") + blksize = min(ngrids, max(mesh[2], int(mem_avail*1e6 / ((2+cell.natm*2)*8)))) + for ig0, ig1 in lib.prange(0, ngrids, blksize): + ngrid_sub = ig1 - ig0 + Gv_sub = np.asarray(Gv[ig0:ig1], order="C") + fn(grad_rec.ctypes.data_as(ctypes.c_void_p), + Gv_sub.ctypes.data_as(ctypes.c_void_p), + chargs.ctypes.data_as(ctypes.c_void_p), + coords.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(ew_eta), ctypes.c_double(weights), + ctypes.c_int(cell.natm), ctypes.c_size_t(ngrid_sub)) + else: + raise NotImplementedError + + grad = grad_dir + grad_rec + return grad diff --git a/pyscf/pbc/gto/neighborlist.py b/pyscf/pbc/gto/neighborlist.py new file mode 100644 index 0000000000..f4a0527ee2 --- /dev/null +++ b/pyscf/pbc/gto/neighborlist.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import ctypes +import numpy as np +from pyscf import lib +from pyscf.lib import logger + +libpbc = lib.load_library('libpbc') + +class _CNeighborPair(ctypes.Structure): + _fields_ = [("nimgs", ctypes.c_int), + ("Ls_list", ctypes.POINTER(ctypes.c_int)), + ("q_cond", ctypes.POINTER(ctypes.c_double)), + ("center", ctypes.POINTER(ctypes.c_double))] + + +class _CNeighborList(ctypes.Structure): + _fields_ = [("nish", ctypes.c_int), + ("njsh", ctypes.c_int), + ("nimgs", ctypes.c_int), + ("pairs", ctypes.POINTER(ctypes.POINTER(_CNeighborPair)))] + + +class _CNeighborListOpt(ctypes.Structure): + _fields_ = [("nl", ctypes.POINTER(_CNeighborList)), + ('fprescreen', ctypes.c_void_p)] + + +def build_neighbor_list_for_shlpairs(cell, cell1=None, Ls=None, + ish_rcut=None, jsh_rcut=None, hermi=0, + precision=None): + ''' + Build the neighbor list of shell pairs for periodic calculations. + + Arguments: + cell : :class:`pbc.gto.cell.Cell` + The :class:`Cell` instance for the bra basis functions. + cell1 : :class:`pbc.gto.cell.Cell`, optional + The :class:`Cell` instance for the ket basis functions. + If not given, both bra and ket basis functions come from cell. + Ls : (*,3) array, optional + The cartesian coordinates of the periodic images. + Default is calculated by :func:`cell.get_lattice_Ls`. + ish_rcut : (nish,) array, optional + The cutoff radii of the shells for bra basis functions. + jsh_rcut : (njsh,) array, optional + The cutoff radii of the shells for ket basis functions. + hermi : int, optional + If :math:`hermi=1`, the task list is built only for + the upper triangle of the matrix. Default is 0. + precision : float, optional + The integral precision. Default is :attr:`cell.precision`. + If both ``ish_rcut`` and ``jsh_rcut`` are given, + ``precision`` will be ignored. + + Returns: :class:`ctypes.POINTER` + The C pointer of the :class:`NeighborList` structure. + ''' + if cell1 is None: + cell1 = cell + if Ls is None: + Ls = cell.get_lattice_Ls() + Ls = np.asarray(Ls, order='C', dtype=float) + nimgs = len(Ls) + + if hermi == 1 and cell1 is not cell: + logger.warn(cell, + "Set hermi=0 because cell and cell1 are not the same.") + hermi = 0 + + ish_atm = np.asarray(cell._atm, order='C', dtype=np.int32) + ish_bas = np.asarray(cell._bas, order='C', dtype=np.int32) + ish_env = np.asarray(cell._env, order='C', dtype=float) + nish = len(ish_bas) + if ish_rcut is None: + ish_rcut = cell.rcut_by_shells(precision=precision) + assert nish == len(ish_rcut) + + if cell1 is cell: + jsh_atm = ish_atm + jsh_bas = ish_bas + jsh_env = ish_env + if jsh_rcut is None: + jsh_rcut = ish_rcut + else: + jsh_atm = np.asarray(cell1._atm, order='C', dtype=np.int32) + jsh_bas = np.asarray(cell1._bas, order='C', dtype=np.int32) + jsh_env = np.asarray(cell1._env, order='C', dtype=float) + if jsh_rcut is None: + jsh_rcut = cell1.rcut_by_shells(precision=precision) + njsh = len(jsh_bas) + assert njsh == len(jsh_rcut) + + nl = ctypes.POINTER(_CNeighborList)() + func = getattr(libpbc, "build_neighbor_list", None) + try: + func(ctypes.byref(nl), + ish_atm.ctypes.data_as(ctypes.c_void_p), + ish_bas.ctypes.data_as(ctypes.c_void_p), + ish_env.ctypes.data_as(ctypes.c_void_p), + ish_rcut.ctypes.data_as(ctypes.c_void_p), + jsh_atm.ctypes.data_as(ctypes.c_void_p), + jsh_bas.ctypes.data_as(ctypes.c_void_p), + jsh_env.ctypes.data_as(ctypes.c_void_p), + jsh_rcut.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nish), ctypes.c_int(njsh), + Ls.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nimgs), + ctypes.c_int(hermi)) + except Exception as e: + raise RuntimeError(f"Failed to build neighbor list for shell pairs.\n{e}") + return nl + +def free_neighbor_list(nl): + func = getattr(libpbc, "del_neighbor_list", None) + try: + func(ctypes.byref(nl)) + except Exception as e: + raise RuntimeError(f"Failed to free neighbor list.\n{e}") + +def neighbor_list_to_ndarray(cell, cell1, nl): + ''' + Returns: + Ls_list: (nLtot,) ndarray + indices of Ls + Ls_idx: (2 x nish x njsh,) ndarray + starting and ending indices in Ls_list + ''' + nish = cell.nbas + njsh = cell1.nbas + Ls_list = [] + Ls_idx = [] + nLtot = 0 + for i in range(nish): + for j in range(njsh): + pair = nl.contents.pairs[i*njsh+j] + nL = pair.contents.nimgs + nLtot += nL + for iL in range(nL): + idx = pair.contents.Ls_list[iL] + Ls_list.append(idx) + if nL > 0: + Ls_idx.extend([nLtot-nL, nLtot]) + else: + Ls_idx.extend([-1,-1]) + return np.asarray(Ls_list), np.asarray(Ls_idx) + + +class NeighborListOpt(): + def __init__(self, cell): + self.cell = cell + self.nl = None + self._this = ctypes.POINTER(_CNeighborListOpt)() + libpbc.NLOpt_init(ctypes.byref(self._this)) + + def build(self, cell=None, cell1=None, Ls=None, + ish_rcut=None, jsh_rcut=None, + hermi=0, precision=None, + set_nl=True, set_optimizer=True): + if cell is None: + cell = self.cell + + if (set_nl or set_optimizer) and self.nl is None: + self.nl = build_neighbor_list_for_shlpairs( + cell, cell1=cell1, Ls=Ls, + ish_rcut=ish_rcut, jsh_rcut=jsh_rcut, + hermi=hermi, precision=precision) + libpbc.NLOpt_set_nl(self._this, self.nl) + + if set_optimizer: + libpbc.NLOpt_set_optimizer(self._this) + + def reset(self, free_nl=True): + if self.nl is not None and free_nl: + free_neighbor_list(self.nl) + self.nl = None + libpbc.NLOpt_reset(self._this) + + def __del__(self): + self.reset() + try: + libpbc.NLOpt_del(ctypes.byref(self._this)) + except AttributeError: + pass diff --git a/pyscf/pbc/gto/pseudo/pp_int.py b/pyscf/pbc/gto/pseudo/pp_int.py index 6114fb7f86..2ff3436dbc 100644 --- a/pyscf/pbc/gto/pseudo/pp_int.py +++ b/pyscf/pbc/gto/pseudo/pp_int.py @@ -29,6 +29,17 @@ from pyscf import lib from pyscf import gto from pyscf import __config__ +from pyscf.pbc.lib.kpts_helper import gamma_point + +EPS_PPL = getattr(__config__, "pbc_gto_pseudo_eps_ppl", 1e-2) +HL_TABLE_SLOTS = 7 +ATOM_OF = 0 +ANG_OF = 1 +HL_DIM_OF = 2 +HL_DATA_OF = 3 +HL_OFFSET0 = 4 +HF_OFFSET1 = 5 +HF_OFFSET2 = 6 libpbc = lib.load_library('libpbc') @@ -106,12 +117,293 @@ def get_gth_vlocG_part1(cell, Gv): def get_pp_loc_part2(cell, kpts=None): '''PRB, 58, 3641 Eq (1), integrals associated to C1, C2, C3, C4 ''' - from pyscf.pbc.df.aft import _IntPPBuilder - vpploc = _IntPPBuilder(cell, kpts).get_pp_loc_part2() + if kpts is None or gamma_point(kpts): + vpploc = [get_pp_loc_part2_gamma(cell)] + else: + from pyscf.pbc.df.aft import _IntPPBuilder + vpploc = _IntPPBuilder(cell, kpts).get_pp_loc_part2() if kpts is None or numpy.shape(kpts) == (3,): vpploc = vpploc[0] return vpploc + +def get_pp_loc_part2_gamma(cell): + from pyscf.pbc.df import incore + from pyscf.pbc.gto import build_neighbor_list_for_shlpairs, free_neighbor_list + + fake_cells = {} + for cn in range(1, 5): + fake_cell = fake_cell_vloc(cell, cn) + fake_cell.precision = EPS_PPL + if fake_cell.nbas > 0: + fake_cells[cn] = fake_cell + + if not fake_cells: + if any(cell.atom_symbol(ia) in cell._pseudo for ia in range(cell.natm)): + pass + else: + lib.logger.warn(cell, 'cell.pseudo was specified but its elements %s ' + 'were not found in the system.', cell._pseudo.keys()) + return 0 + + intors = ('int3c2e', 'int3c1e', 'int3c1e_r2_origk', + 'int3c1e_r4_origk', 'int3c1e_r6_origk') + kptij_lst = numpy.zeros((1,2,3)) + Ls = cell.get_lattice_Ls() + buf = None + for i, (cn, fake_cell) in enumerate(fake_cells.items()): + neighbor_list = build_neighbor_list_for_shlpairs(fake_cell, cell, Ls) + v = incore.aux_e2_sum_auxbas(cell, fake_cell, intors[cn], aosym='s2', comp=1, + kptij_lst=kptij_lst, neighbor_list=neighbor_list) + if i == 0: + buf = v + else: + buf = numpy.add(buf, v, out=buf) + v = None + free_neighbor_list(neighbor_list) + + vpploc = lib.unpack_tril(buf) + return vpploc + + +# TODO add k-point sampling +def vpploc_part2_nuc_grad(cell, dm, kpts=None): + ''' + Nuclear gradients of the 2nd part of the local part of + the GTH pseudo potential, contracted with the density matrix. + ''' + from pyscf.pbc.df import incore + from pyscf.pbc.gto import build_neighbor_list_for_shlpairs, free_neighbor_list + if kpts is not None and not gamma_point(kpts): + raise NotImplementedError("k-point sampling not available") + + if kpts is None: + kpts_lst = numpy.zeros((1,3)) + else: + kpts_lst = numpy.reshape(kpts, (-1,3)) + kptij_lst = numpy.hstack((kpts_lst,kpts_lst)).reshape(-1,2,3) + + intors = ('int3c2e_ip1', 'int3c1e_ip1', 'int3c1e_ip1_r2_origk', + 'int3c1e_ip1_r4_origk', 'int3c1e_ip1_r6_origk') + + Ls = cell.get_lattice_Ls() + count = 0 + grad = 0 + for cn in range(1, 5): + fakecell = fake_cell_vloc(cell, cn) + fakecell.precision = EPS_PPL + if fakecell.nbas > 0: + neighbor_list = build_neighbor_list_for_shlpairs(fakecell, cell, Ls) + buf = incore.int3c1e_nuc_grad(cell, fakecell, dm, intors[cn], + kptij_lst=kptij_lst, neighbor_list=neighbor_list) + if count == 0: + grad = buf + else: + grad = numpy.add(grad, buf, out=grad) + buf = None + count += 1 + free_neighbor_list(neighbor_list) + grad *= -2 + return grad + + +def _prepare_hl_data(fakecell, hl_blocks): + offset = [0] * 3 + hl_table = numpy.empty((len(hl_blocks),HL_TABLE_SLOTS), order='C', dtype=numpy.int32) + hl_data = [] + ptr = 0 + for ib, hl in enumerate(hl_blocks): + hl_table[ib,ATOM_OF] = fakecell._bas[ib,0] + hl_table[ib,ANG_OF] = l = fakecell.bas_angular(ib) + hl_dim = hl.shape[0] + hl_table[ib,HL_DIM_OF], hl_table[ib,HL_DATA_OF] = hl_dim, ptr + ptr += hl_dim**2 + hl_data.extend(list(hl.ravel())) + nd = 2 * l + 1 + for i in range(hl_dim): + hl_table[ib, i+HL_OFFSET0] = offset[i] + offset[i] += nd + hl_data = numpy.asarray(hl_data, order='C', dtype=numpy.double) + return hl_table, hl_data + + +# TODO add k-point sampling +def _contract_ppnl(cell, fakecell, hl_blocks, ppnl_half, comp=1, kpts=None): + from pyscf.pbc.gto import NeighborListOpt + if kpts is None: + kpts_lst = numpy.zeros((1,3)) + else: + kpts_lst = numpy.reshape(kpts, (-1,3)) + + hl_table, hl_data = _prepare_hl_data(fakecell, hl_blocks) + + opt = NeighborListOpt(fakecell) + opt.build(fakecell, cell) + + shls_slice = (0, cell.nbas, 0, cell.nbas) + key = 'cart' if cell.cart else 'sph' + ao_loc = gto.moleintor.make_loc(cell._bas, key) + + ppnl = [] + nao = cell.nao_nr() + nao_pair = nao * (nao+1) // 2 + for k, kpt in enumerate(kpts_lst): + ppnl_half0 = ppnl_half1 = ppnl_half2 = None + if len(ppnl_half[0]) > 0: + ppnl_half0 = ppnl_half[0][k] + if len(ppnl_half[1]) > 0: + ppnl_half1 = ppnl_half[1][k] + if len(ppnl_half[2]) > 0: + ppnl_half2 = ppnl_half[2][k] + + if gamma_point(kpt): + if ppnl_half0 is not None: + ppnl_half0 = ppnl_half0.real + if ppnl_half1 is not None: + ppnl_half1 = ppnl_half1.real + if ppnl_half2 is not None: + ppnl_half2 = ppnl_half2.real + buf = numpy.empty([nao_pair], order='C', dtype=numpy.double) + fill = getattr(libpbc, 'ppnl_fill_gs2') + else: + buf = numpy.empty([nao_pair], order='C', dtype=numpy.complex128) + raise NotImplementedError + + ppnl_half0 = numpy.asarray(ppnl_half0, order='C') + ppnl_half1 = numpy.asarray(ppnl_half1, order='C') + ppnl_half2 = numpy.asarray(ppnl_half2, order='C') + + drv = getattr(libpbc, "contract_ppnl", None) + try: + drv(fill, buf.ctypes.data_as(ctypes.c_void_p), + ppnl_half0.ctypes.data_as(ctypes.c_void_p), + ppnl_half1.ctypes.data_as(ctypes.c_void_p), + ppnl_half2.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(comp), (ctypes.c_int*4)(*shls_slice), + ao_loc.ctypes.data_as(ctypes.c_void_p), + hl_table.ctypes.data_as(ctypes.c_void_p), + hl_data.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(len(hl_blocks)), opt._this) + except Exception as e: + raise RuntimeError(f"Failed to compute non-local pseudo-potential.\n{e}") + + ppnl_k = lib.unpack_tril(buf) + ppnl.append(ppnl_k) + + if kpts is None or numpy.shape(kpts) == (3,): + ppnl = ppnl[0] + return ppnl + + +# TODO add k-point sampling +def _contract_ppnl_nuc_grad(cell, fakecell, dms, hl_blocks, ppnl_half, ppnl_half_ip2, + comp=3, kpts=None, hl_table=None, hl_data=None): + from pyscf.pbc.gto import NeighborListOpt + if kpts is None: + kpts_lst = numpy.zeros((1,3)) + else: + kpts_lst = numpy.reshape(kpts, (-1,3)) + + if hl_table is None: + hl_table, hl_data = _prepare_hl_data(fakecell, hl_blocks) + + opt = NeighborListOpt(fakecell) + opt.build(fakecell, cell) + + nkpts = len(kpts_lst) + nao = cell.nao + dms = dms.reshape(nkpts, nao, nao) + shls_slice = (0, cell.nbas, 0, cell.nbas) + bas = numpy.asarray(cell._bas, order='C', dtype=numpy.int32) + key = 'cart' if cell.cart else 'sph' + ao_loc = gto.moleintor.make_loc(bas, key) + + grad = [] + for k, kpt in enumerate(kpts_lst): + dm = dms[k] + naux = [0] * 3 + ppnl_half0 = ppnl_half1 = ppnl_half2 = None + if len(ppnl_half[0]) > 0: + ppnl_half0 = ppnl_half[0][k] + naux[0] = ppnl_half0.shape[0] + if len(ppnl_half[1]) > 0: + ppnl_half1 = ppnl_half[1][k] + naux[1] = ppnl_half1.shape[0] + if len(ppnl_half[2]) > 0: + ppnl_half2 = ppnl_half[2][k] + naux[2] = ppnl_half2.shape[0] + + ppnl_half_ip2_0 = ppnl_half_ip2_1 = ppnl_half_ip2_2 = None + if len(ppnl_half_ip2[0]) > 0: + ppnl_half_ip2_0 = ppnl_half_ip2[0][k] + assert naux[0] == ppnl_half_ip2_0.shape[1] + if len(ppnl_half_ip2[1]) > 0: + ppnl_half_ip2_1 = ppnl_half_ip2[1][k] + assert naux[1] == ppnl_half_ip2_1.shape[1] + if len(ppnl_half_ip2[2]) > 0: + ppnl_half_ip2_2 = ppnl_half_ip2[2][k] + assert naux[2] == ppnl_half_ip2_2.shape[1] + + naux = numpy.asarray(naux, dtype=numpy.int32) + + if gamma_point(kpt): + dm = dm.real + if ppnl_half0 is not None: + ppnl_half0 = ppnl_half0.real + ppnl_half_ip2_0 = ppnl_half_ip2_0.real + if ppnl_half1 is not None: + ppnl_half1 = ppnl_half1.real + ppnl_half_ip2_1 = ppnl_half_ip2_1.real + if ppnl_half2 is not None: + ppnl_half2 = ppnl_half2.real + ppnl_half_ip2_2 = ppnl_half_ip2_2.real + grad_k = numpy.zeros([cell.natm, comp], order='C', dtype=numpy.double) + fill = getattr(libpbc, 'ppnl_nuc_grad_fill_gs1') + else: + grad_k = numpy.empty([cell.natm, comp], order='C', dtype=numpy.complex128) + raise NotImplementedError + + dm = numpy.asarray(dm, order='C') + ppnl_half0 = numpy.asarray(ppnl_half0, order='C') + ppnl_half1 = numpy.asarray(ppnl_half1, order='C') + ppnl_half2 = numpy.asarray(ppnl_half2, order='C') + ppnl_half_ip2_0 = numpy.asarray(ppnl_half_ip2_0, order='C') + ppnl_half_ip2_1 = numpy.asarray(ppnl_half_ip2_1, order='C') + ppnl_half_ip2_2 = numpy.asarray(ppnl_half_ip2_2, order='C') + + drv = getattr(libpbc, "contract_ppnl_nuc_grad", None) + try: + drv(fill, + grad_k.ctypes.data_as(ctypes.c_void_p), + dm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(comp), + ppnl_half0.ctypes.data_as(ctypes.c_void_p), + ppnl_half1.ctypes.data_as(ctypes.c_void_p), + ppnl_half2.ctypes.data_as(ctypes.c_void_p), + ppnl_half_ip2_0.ctypes.data_as(ctypes.c_void_p), + ppnl_half_ip2_1.ctypes.data_as(ctypes.c_void_p), + ppnl_half_ip2_2.ctypes.data_as(ctypes.c_void_p), + hl_table.ctypes.data_as(ctypes.c_void_p), + hl_data.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(len(hl_blocks)), + naux.ctypes.data_as(ctypes.c_void_p), + (ctypes.c_int*4)(*shls_slice), + ao_loc.ctypes.data_as(ctypes.c_void_p), + bas.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(cell.natm), opt._this) + except Exception as e: + raise RuntimeError(f"Failed to compute non-local pp nuclear gradient.\n{e}") + grad.append(grad_k) + + grad_tot = 0 + if nkpts == 1: + grad_tot = grad[0] + else: + for k in range(nkpts): + grad_tot += grad[k] + grad_tot = grad_tot.real + return grad_tot + + def get_pp_nl(cell, kpts=None): if kpts is None: kpts_lst = numpy.zeros((1,3)) @@ -122,6 +414,10 @@ def get_pp_nl(cell, kpts=None): fakecell, hl_blocks = fake_cell_vnl(cell) ppnl_half = _int_vnl(cell, fakecell, hl_blocks, kpts_lst) nao = cell.nao_nr() + + if gamma_point(kpts_lst): + return _contract_ppnl(cell, fakecell, hl_blocks, ppnl_half, kpts=kpts) + buf = numpy.empty((3*9*nao), dtype=numpy.complex128) # We set this equal to zeros in case hl_blocks loop is skipped @@ -148,7 +444,32 @@ def get_pp_nl(cell, kpts=None): return ppnl -def fake_cell_vloc(cell, cn=0): +def vppnl_nuc_grad(cell, dm, kpts=None): + ''' + Nuclear gradients of the non-local part of the GTH pseudo potential, + contracted with the density matrix. + ''' + if kpts is None: + kpts_lst = numpy.zeros((1,3)) + else: + kpts_lst = numpy.reshape(kpts, (-1,3)) + + fakecell, hl_blocks = fake_cell_vnl(cell) + intors = ('int1e_ipovlp', 'int1e_r2_origi_ip2', 'int1e_r4_origi_ip2') + ppnl_half = _int_vnl(cell, fakecell, hl_blocks, kpts_lst) + ppnl_half_ip2 = _int_vnl(cell, fakecell, hl_blocks, kpts_lst, intors, comp=3) + # int1e_ipovlp computes ip1 so multiply -1 to get ip2 + if len(ppnl_half_ip2[0]) > 0: + for k, kpt in enumerate(kpts_lst): + ppnl_half_ip2[0][k] *= -1 + + grad = _contract_ppnl_nuc_grad(cell, fakecell, dm, hl_blocks, + ppnl_half, ppnl_half_ip2, kpts=kpts) + grad *= -2 + return grad + + +def fake_cell_vloc(cell, cn=0, atm_id=None): '''Generate fake cell for V_{loc}. Each term of V_{loc} (erf, C_1, C_2, C_3, C_4) is a gaussian type @@ -158,17 +479,23 @@ def fake_cell_vloc(cell, cn=0): The kwarg cn indiciates which term to generate for the fake cell. If cn = 0, the erf term is generated. C_1,..,C_4 are generated with cn = 1..4 ''' - fake_env = [cell.atom_coords().ravel()] - fake_atm = cell._atm.copy() - fake_atm[:,gto.PTR_COORD] = numpy.arange(0, cell.natm*3, 3) - ptr = cell.natm * 3 + if atm_id is None: + atm_id = numpy.arange(cell.natm) + else: + atm_id = numpy.asarray(atm_id) + natm = len(atm_id) + + fake_env = [cell.atom_coords()[atm_id].ravel()] + fake_atm = cell._atm[atm_id].copy().reshape(natm,-1) + fake_atm[:,gto.PTR_COORD] = numpy.arange(0, natm*3, 3) + ptr = natm * 3 fake_bas = [] half_sph_norm = .5/numpy.pi**.5 - for ia in range(cell.natm): - if cell.atom_charge(ia) == 0: # pass ghost atoms + for ia, atm in enumerate(atm_id): + if cell.atom_charge(atm) == 0: # pass ghost atoms continue - symb = cell.atom_symbol(ia) + symb = cell.atom_symbol(atm) if cn == 0: if symb in cell._pseudo: pp = cell._pseudo[symb] @@ -196,6 +523,7 @@ def fake_cell_vloc(cell, cn=0): fakecell._env = numpy.asarray(numpy.hstack(fake_env), dtype=numpy.double) return fakecell + # sqrt(Gamma(l+1.5)/Gamma(l+2i+1.5)) _PLI_FAC = 1/numpy.sqrt(numpy.array(( (1, 3.75 , 59.0625 ), # l = 0, @@ -249,12 +577,14 @@ def fake_cell_vnl(cell): fakecell = cell.copy(deep=False) fakecell._atm = numpy.asarray(fake_atm, dtype=numpy.int32) - fakecell._bas = numpy.asarray(fake_bas, dtype=numpy.int32) + fakecell._bas = numpy.asarray(fake_bas, dtype=numpy.int32).reshape(-1, gto.BAS_SLOTS) fakecell._env = numpy.asarray(numpy.hstack(fake_env), dtype=numpy.double) return fakecell, hl_blocks -def _int_vnl(cell, fakecell, hl_blocks, kpts): +def _int_vnl(cell, fakecell, hl_blocks, kpts, intors=None, comp=1): '''Vnuc - Vloc''' + if intors is None: + intors = ['int1e_ovlp', 'int1e_r2_origi', 'int1e_r4_origi'] rcut = max(cell.rcut, fakecell.rcut) Ls = cell.get_lattice_Ls(rcut=rcut) nimgs = len(Ls) @@ -262,6 +592,7 @@ def _int_vnl(cell, fakecell, hl_blocks, kpts): nkpts = len(kpts) fill = getattr(libpbc, 'PBCnr2c_fill_ks1') + # TODO add screening cintopt = lib.c_null_ptr() def int_ket(_bas, intor): @@ -279,8 +610,10 @@ def int_ket(_bas, intor): ao_loc = gto.moleintor.make_loc(bas, intor) ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] - out = numpy.empty((nkpts,ni,nj), dtype=numpy.complex128) - comp = 1 + if comp == 1: + out = numpy.empty((nkpts,ni,nj), dtype=numpy.complex128) + else: + out = numpy.empty((nkpts,comp,ni,nj), dtype=numpy.complex128) fintor = getattr(gto.moleintor.libcgto, intor) @@ -297,7 +630,7 @@ def int_ket(_bas, intor): return out hl_dims = numpy.asarray([len(hl) for hl in hl_blocks]) - out = (int_ket(fakecell._bas[hl_dims>0], 'int1e_ovlp'), - int_ket(fakecell._bas[hl_dims>1], 'int1e_r2_origi'), - int_ket(fakecell._bas[hl_dims>2], 'int1e_r4_origi')) + out = (int_ket(fakecell._bas[hl_dims>0], intors[0]), + int_ket(fakecell._bas[hl_dims>1], intors[1]), + int_ket(fakecell._bas[hl_dims>2], intors[2])) return out diff --git a/pyscf/pbc/gto/pseudo/test/test_pp.py b/pyscf/pbc/gto/pseudo/test/test_pp.py index c00057a064..95b343bbf6 100644 --- a/pyscf/pbc/gto/pseudo/test/test_pp.py +++ b/pyscf/pbc/gto/pseudo/test/test_pp.py @@ -22,6 +22,7 @@ from pyscf.pbc.dft import numint from pyscf.pbc.gto import pseudo from pyscf.pbc.gto.pseudo import pp_int +from pyscf.data.nist import BOHR def get_pp_loc_part2(cell, kpt=np.zeros(3)): @@ -244,7 +245,42 @@ def test_pp(self): v1 = pseudo.get_pp(cell, k) self.assertAlmostEqual(abs(v0-v1).max(), 0, 6) + def test_pp_nuc_grad(self): + cell = pbcgto.Cell() + cell.atom = 'H 0 0 0; Na 0 0 0.8' + cell.a = np.diag([6,6,6]) + cell.basis='gth-szv' + cell.pseudo='gth-pade' + cell.ke_cutoff=200 + cell.build() + + cellp = cell.copy() + cellp.atom = 'H 0 0 0; Na 0 0 0.8001' + cellp.build() + cellm = cell.copy() + cellm.atom = 'H 0 0 0; Na 0 0 0.7999' + cellm.build() + + np.random.seed(1) + dm = np.random.rand(cell.nao, cell.nao) + dm = (dm + dm.T) / 2 + + # local_part2 + vp = pp_int.get_pp_loc_part2(cellp) + vm = pp_int.get_pp_loc_part2(cellm) + v_fd = (vp - vm) / (0.0002 / BOHR) + grad = pp_int.vpploc_part2_nuc_grad(cell, dm)[1,2] + grad_fd = np.einsum("ij,ij->", v_fd, dm) + self.assertAlmostEqual(abs(grad - grad_fd), 0, 7) + + # non-local + vp = pp_int.get_pp_nl(cellp) + vm = pp_int.get_pp_nl(cellm) + v_fd = (vp - vm) / (0.0002 / BOHR) + grad = pp_int.vppnl_nuc_grad(cell, dm)[1,2] + grad_fd = np.einsum("ij,ij->", v_fd, dm) + self.assertAlmostEqual(abs(grad - grad_fd), 0, 7) if __name__ == '__main__': print("Full Tests for pbc.gto.pseudo") diff --git a/pyscf/pbc/gto/test/test_cell.py b/pyscf/pbc/gto/test/test_cell.py index 5dee058140..bd7a0e067f 100644 --- a/pyscf/pbc/gto/test/test_cell.py +++ b/pyscf/pbc/gto/test/test_cell.py @@ -25,6 +25,7 @@ from pyscf.pbc import gto as pgto from pyscf.pbc.gto import ecp from pyscf.pbc.tools import pbc as pbctools +from pyscf.pbc.gto import ewald_methods def setUpModule(): @@ -252,6 +253,30 @@ def test_ewald_2d(self): # eref = cell.to_mol().energy_nuc() # self.assertAlmostEqual(cell.ewald(), eref, 2) + def test_particle_mesh_ewald(self): + cell = pgto.Cell() + cell.a = np.diag([10.,]*3) + cell.atom = ''' + O 5.84560 5.21649 5.10372 + H 6.30941 5.30070 5.92953 + H 4.91429 5.26674 5.28886 + ''' + cell.pseudo = 'gth-pade' + cell.verbose = 0 + cell.build() + + cell1 = cell.copy() + cell1.use_particle_mesh_ewald = True + cell1.build() + + e0 = cell.ewald() + e1 = cell1.ewald() + self.assertAlmostEqual(e0, e1, 6) + + g0 = ewald_methods.ewald_nuc_grad(cell) + g1 = ewald_methods.ewald_nuc_grad(cell1) + self.assertAlmostEqual(abs(g1-g0).max(), 0, 6) + def test_pbc_intor(self): numpy.random.seed(12) kpts = numpy.random.random((4,3)) diff --git a/pyscf/pbc/scf/hf.py b/pyscf/pbc/scf/hf.py index 8225d778b6..f6c91336ed 100644 --- a/pyscf/pbc/scf/hf.py +++ b/pyscf/pbc/scf/hf.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2019 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -53,23 +53,24 @@ def get_ovlp(cell, kpt=np.zeros(3)): # Avoid pbcopt's prescreening in the lattice sum, for better accuracy s = cell.pbc_intor('int1e_ovlp', hermi=0, kpts=kpt, pbcopt=lib.c_null_ptr()) - s = lib.asarray(s) + s = np.asarray(s) hermi_error = abs(s - np.rollaxis(s.conj(), -1, -2)).max() if hermi_error > cell.precision and hermi_error > 1e-12: logger.warn(cell, '%.4g error found in overlap integrals. ' 'cell.precision or cell.rcut can be adjusted to ' 'improve accuracy.', hermi_error) - cond = np.max(lib.cond(s)) - if cond * precision > 1e2: - prec = 1e7 / cond - rmin = gto.estimate_rcut(cell, prec*1e-5) - logger.warn(cell, 'Singularity detected in overlap matrix. ' - 'Integral accuracy may be not enough.\n ' - 'You can adjust cell.precision or cell.rcut to ' - 'improve accuracy. Recommended settings are\n ' - 'cell.precision < %.2g\n ' - 'cell.rcut > %.4g', prec, rmin) + if cell.verbose >= logger.DEBUG: + cond = np.max(lib.cond(s)) + if cond * precision > 1e2: + prec = 1e7 / cond + rmin = gto.estimate_rcut(cell, prec*1e-5) + logger.warn(cell, 'Singularity detected in overlap matrix. ' + 'Integral accuracy may be not enough.\n ' + 'You can adjust cell.precision or cell.rcut to ' + 'improve accuracy. Recommended settings are\n ' + 'cell.precision < %.2g\n ' + 'cell.rcut > %.4g', prec, rmin) return s @@ -615,11 +616,18 @@ def dump_flags(self, verbose=None): return self def check_sanity(self): - mol_hf.SCF.check_sanity(self) + lib.StreamObject.check_sanity(self) if (isinstance(self.exxdiv, str) and self.exxdiv.lower() != 'ewald' and isinstance(self.with_df, df.df.DF)): logger.warn(self, 'exxdiv %s is not supported in DF or MDF', self.exxdiv) + + if self.verbose >= logger.DEBUG: + s = self.get_ovlp() + cond = np.max(lib.cond(s)) + if cond * 1e-17 > self.conv_tol: + logger.warn(self, 'Singularity detected in overlap matrix (condition number = %4.3g). ' + 'SCF may be inaccurate and hard to converge.', cond) return self def get_hcore(self, cell=None, kpt=None): @@ -738,7 +746,7 @@ def get_jk_incore(self, cell=None, dm=None, hermi=1, kpt=None, omega=None, return self.get_jk(cell, dm, hermi, kpt) def energy_nuc(self): - return self.cell.energy_nuc() + return self.cell.enuc @lib.with_doc(dip_moment.__doc__) def dip_moment(self, cell=None, dm=None, unit='Debye', verbose=logger.NOTE, @@ -758,10 +766,10 @@ def _finalize(self): makov_payne_correction(self) return self - def get_init_guess(self, cell=None, key='minao'): + def get_init_guess(self, cell=None, key='minao', s1e=None): if cell is None: cell = self.cell dm = mol_hf.SCF.get_init_guess(self, cell, key) - dm = normalize_dm_(self, dm) + dm = normalize_dm_(self, dm, s1e) return dm def init_guess_by_1e(self, cell=None): @@ -914,12 +922,14 @@ def _format_jks(vj, dm, kpts_band): vj = vj[0] return vj -def normalize_dm_(mf, dm): +def normalize_dm_(mf, dm, s1e=None): ''' Scale density matrix to make it produce the correct number of electrons. ''' cell = mf.cell - ne = np.einsum('ij,ji->', dm, mf.get_ovlp(cell)).real + if s1e is None: + s1e = mf.get_ovlp(cell) + ne = lib.einsum('ij,ji->', dm, s1e).real if abs(ne - cell.nelectron) > 0.01: logger.debug(mf, 'Big error detected in the electron number ' 'of initial guess density matrix (Ne/cell = %g)!\n' diff --git a/pyscf/pbc/scf/khf.py b/pyscf/pbc/scf/khf.py index 1ef2d88908..89124e8af4 100644 --- a/pyscf/pbc/scf/khf.py +++ b/pyscf/pbc/scf/khf.py @@ -496,7 +496,7 @@ def dump_flags(self, verbose=None): self.with_df.dump_flags(verbose) return self - def get_init_guess(self, cell=None, key='minao'): + def get_init_guess(self, cell=None, key='minao', s1e=None): raise NotImplementedError def init_guess_by_1e(self, cell=None): @@ -524,10 +524,10 @@ def get_jk(self, cell=None, dm_kpts=None, hermi=1, kpts=None, kpts_band=None, cpu0 = (logger.process_clock(), logger.perf_counter()) if self.rsjk: vj, vk = self.rsjk.get_jk(dm_kpts, hermi, kpts, kpts_band, - with_j, with_k, omega, self.exxdiv) + with_j, with_k, omega=omega, exxdiv=self.exxdiv) else: vj, vk = self.with_df.get_jk(dm_kpts, hermi, kpts, kpts_band, - with_j, with_k, omega, self.exxdiv) + with_j, with_k, omega=omega, exxdiv=self.exxdiv) logger.timer(self, 'vj and vk', *cpu0) return vj, vk @@ -700,7 +700,9 @@ def check_sanity(self): 'found in KRHF method.', cell.nelec, nkpts) return KSCF.check_sanity(self) - def get_init_guess(self, cell=None, key='minao'): + def get_init_guess(self, cell=None, key='minao', s1e=None): + if s1e is None: + s1e = self.get_ovlp(cell) dm = mol_hf.SCF.get_init_guess(self, cell, key) nkpts = len(self.kpts) if dm.ndim == 2: @@ -708,7 +710,7 @@ def get_init_guess(self, cell=None, key='minao'): dm = np.repeat(dm[None,:,:], nkpts, axis=0) dm_kpts = dm - ne = np.einsum('kij,kji->', dm_kpts, self.get_ovlp(cell)).real + ne = lib.einsum('kij,kji->', dm_kpts, s1e).real # FIXME: consider the fractional num_electron or not? This maybe # relate to the charged system. nelectron = float(self.cell.tot_electrons(nkpts)) diff --git a/pyscf/pbc/scf/khf_ksymm.py b/pyscf/pbc/scf/khf_ksymm.py index baaf5543a6..69e4d5c5d1 100644 --- a/pyscf/pbc/scf/khf_ksymm.py +++ b/pyscf/pbc/scf/khf_ksymm.py @@ -343,14 +343,16 @@ class KsymAdaptedKRHF(KsymAdaptedKSCF, khf.KRHF): to_ks = khf.KRHF.to_ks convert_from_ = khf.KRHF.convert_from_ - def get_init_guess(self, cell=None, key='minao'): + def get_init_guess(self, cell=None, key='minao', s1e=None): + if s1e is None: + s1e = self.get_ovlp(cell) dm_kpts = mol_hf.SCF.get_init_guess(self, cell, key) if dm_kpts.ndim == 2: dm_kpts = np.asarray([dm_kpts]*self.kpts.nkpts_ibz) elif len(dm_kpts) != self.kpts.nkpts_ibz: dm_kpts = dm_kpts[self.kpts.ibz2bz] - ne = np.einsum('k,kij,kji', self.kpts.weights_ibz, dm_kpts, self.get_ovlp(cell)).real + ne = lib.einsum('k,kij,kji', self.kpts.weights_ibz, dm_kpts, s1e).real nkpts = self.kpts.nkpts ne *= nkpts nelectron = float(self.cell.tot_electrons(nkpts)) diff --git a/pyscf/pbc/scf/kuhf.py b/pyscf/pbc/scf/kuhf.py index af56a2ced3..eae04c0713 100644 --- a/pyscf/pbc/scf/kuhf.py +++ b/pyscf/pbc/scf/kuhf.py @@ -416,7 +416,9 @@ def dump_flags(self, verbose=None): 'alpha = %d beta = %d', *self.nelec) return self - def get_init_guess(self, cell=None, key='minao'): + def get_init_guess(self, cell=None, key='minao', s1e=None): + if s1e is None: + s1e = self.get_ovlp(cell) dm_kpts = mol_hf.SCF.get_init_guess(self, cell, key) assert dm_kpts.shape[0] == 2 nkpts = len(self.kpts) @@ -424,7 +426,7 @@ def get_init_guess(self, cell=None, key='minao'): # dm[spin,nao,nao] at gamma point -> dm_kpts[spin,nkpts,nao,nao] dm_kpts = np.repeat(dm_kpts[:,None,:,:], nkpts, axis=1) - ne = np.einsum('xkij,kji->x', dm_kpts, self.get_ovlp(cell)).real + ne = lib.einsum('xkij,kji->x', dm_kpts, s1e).real nelec = np.asarray(self.nelec) if np.any(abs(ne - nelec) > 0.01*nkpts): logger.debug(self, 'Big error detected in the electron number ' diff --git a/pyscf/pbc/scf/kuhf_ksymm.py b/pyscf/pbc/scf/kuhf_ksymm.py index 310de63289..4e10ed0fdc 100644 --- a/pyscf/pbc/scf/kuhf_ksymm.py +++ b/pyscf/pbc/scf/kuhf_ksymm.py @@ -155,7 +155,9 @@ def dump_flags(self, verbose=None): 'alpha = %d beta = %d', *self.nelec) return self - def get_init_guess(self, cell=None, key='minao'): + def get_init_guess(self, cell=None, key='minao', s1e=None): + if s1e is None: + s1e = self.get_ovlp(cell) dm_kpts = mol_hf.SCF.get_init_guess(self, cell, key) assert dm_kpts.shape[0]==2 if dm_kpts.ndim != 4: @@ -165,7 +167,7 @@ def get_init_guess(self, cell=None, key='minao'): elif dm_kpts.shape[1] != self.kpts.nkpts_ibz: dm_kpts = dm_kpts[:,self.kpts.ibz2bz] - ne = np.einsum('k,xkij,kji->x', self.kpts.weights_ibz, dm_kpts, self.get_ovlp(cell)).real + ne = lib.einsum('k,xkij,kji->x', self.kpts.weights_ibz, dm_kpts, s1e).real nkpts = self.kpts.nkpts ne *= nkpts nelec = np.asarray(self.nelec) diff --git a/pyscf/pbc/scf/test/test_hf.py b/pyscf/pbc/scf/test/test_hf.py index fe3387468b..3e47561cee 100644 --- a/pyscf/pbc/scf/test/test_hf.py +++ b/pyscf/pbc/scf/test/test_hf.py @@ -20,6 +20,7 @@ import tempfile import numpy from pyscf import lib +from pyscf.scf import atom_hf from pyscf.pbc import gto as pbcgto from pyscf.pbc.scf import hf as pbchf import pyscf.pbc.scf as pscf @@ -511,7 +512,7 @@ def test_init_guess_by_1e(self): self.assertEqual(dm.ndim, 3) self.assertAlmostEqual(lib.fp(dm), 0.025922864381755062, 6) - def test_init_guess_by_atom(self): + def test_init_guess_by_minao(self): with lib.temporary_env(cell, dimension=1): dm = mf.get_init_guess(key='minao') kdm = kmf.get_init_guess(key='minao') @@ -521,6 +522,29 @@ def test_init_guess_by_atom(self): self.assertEqual(kdm.ndim, 3) self.assertAlmostEqual(lib.fp(kdm), -1.714952331211208, 8) + def test_init_guess_by_atom(self): + with lib.temporary_env(cell, dimension=1): + dm = mf.get_init_guess(key='atom') + kdm = kmf.get_init_guess(key='atom') + + self.assertAlmostEqual(lib.fp(dm), 0.18074522075843902, 7) + + self.assertEqual(kdm.ndim, 3) + self.assertAlmostEqual(lib.fp(dm), 0.18074522075843902, 7) + + def test_atom_hf_with_pp(self): + mol = pbcgto.Cell() + mol.build( + verbose = 7, + output = '/dev/null', + atom = 'O 0 0 0; H 0 0 -1; H 0 0 1', + a = [[5, 0, 0], [0, 5, 0], [0, 0, 5]], + basis = 'gth-dzvp', + pseudo = 'gth-pade') + scf_result = atom_hf.get_atm_nrhf(mol) + self.assertAlmostEqual(scf_result['O'][0], -15.193243796069835, 9) + self.assertAlmostEqual(scf_result['H'][0], -0.49777509423571864, 9) + def test_jk(self): nao = cell.nao numpy.random.seed(2) diff --git a/pyscf/pbc/scf/uhf.py b/pyscf/pbc/scf/uhf.py index b9d9b1407d..0d247f745e 100644 --- a/pyscf/pbc/scf/uhf.py +++ b/pyscf/pbc/scf/uhf.py @@ -221,10 +221,13 @@ def dip_moment(self, cell=None, dm=None, unit='Debye', verbose=logger.NOTE, rho = self.get_rho(dm) return dip_moment(cell, dm, unit, verbose, rho=rho, kpt=self.kpt, **kwargs) - def get_init_guess(self, cell=None, key='minao'): - if cell is None: cell = self.cell + def get_init_guess(self, cell=None, key='minao', s1e=None): + if cell is None: + cell = self.cell + if s1e is None: + s1e = self.get_ovlp(cell) dm = mol_uhf.UHF.get_init_guess(self, cell, key) - ne = np.einsum('xij,ji->x', dm, self.get_ovlp(cell)).real + ne = np.einsum('xij,ji->x', dm, s1e).real nelec = self.nelec if np.any(abs(ne - nelec) > 0.01): logger.debug(self, 'Big error detected in the electron number ' diff --git a/pyscf/pbc/symm/geom.py b/pyscf/pbc/symm/geom.py index 74119a4483..ae698d2347 100644 --- a/pyscf/pbc/symm/geom.py +++ b/pyscf/pbc/symm/geom.py @@ -77,7 +77,7 @@ def search_space_group_ops(cell, rotations=None, tol=SYMPREC): ''' if rotations is None: rotations = search_point_group_ops(cell, tol=tol) a = cell.lattice_vectors() - coords = cell.get_scaled_positions() + coords = cell.get_scaled_atom_coords() atmgrp = mole.atom_types(cell._atom, magmom=cell.magmom) atmgrp_spin_inv = {} #spin up and down inverted has_spin = False diff --git a/pyscf/pbc/symm/pyscf_spglib.py b/pyscf/pbc/symm/pyscf_spglib.py index 3a0d1442cb..f87117a8dd 100644 --- a/pyscf/pbc/symm/pyscf_spglib.py +++ b/pyscf/pbc/symm/pyscf_spglib.py @@ -29,7 +29,7 @@ def cell_to_spgcell(cell): Convert PySCF Cell object to spglib cell object ''' a = cell.lattice_vectors() - atm_pos = cell.get_scaled_positions() + atm_pos = cell.get_scaled_atom_coords() atm_num = [] from pyscf.data import elements for symbol in cell.elements: diff --git a/pyscf/pbc/symm/symmetry.py b/pyscf/pbc/symm/symmetry.py index c79bc81167..ce29e3afac 100644 --- a/pyscf/pbc/symm/symmetry.py +++ b/pyscf/pbc/symm/symmetry.py @@ -219,7 +219,7 @@ def dump_info(self): def _get_phase(cell, op, kpt_scaled, ignore_phase=False, tol=SYMPREC): kpt_scaled = op.a2b(cell).dot_rot(kpt_scaled) - coords_scaled = cell.get_scaled_positions().reshape(-1,3) + coords_scaled = cell.get_scaled_atom_coords().reshape(-1,3) natm = coords_scaled.shape[0] phase = np.ones((natm,), dtype=np.complex128) atm_map = np.arange(natm) diff --git a/pyscf/pbc/tools/pbc.py b/pyscf/pbc/tools/pbc.py index 7ca867fd21..20d45fe692 100644 --- a/pyscf/pbc/tools/pbc.py +++ b/pyscf/pbc/tools/pbc.py @@ -14,6 +14,7 @@ # limitations under the License. import warnings +import ctypes import numpy as np import scipy.linalg from pyscf import lib @@ -57,6 +58,44 @@ def _ifftn_blas(g, mesh): return out.reshape(-1, *mesh) if FFT_ENGINE == 'FFTW': + try: + libfft = lib.load_library('libfft') + except OSError: + raise RuntimeError("Failed to load libfft") + + def _copy_d2z(a): + fn = libfft._copy_d2z + out = np.empty(a.shape, dtype=np.complex128) + fn(out.ctypes.data_as(ctypes.c_void_p), + a.ctypes.data_as(ctypes.c_void_p), + ctypes.c_size_t(a.size)) + return out + + def _complex_fftn_fftw(f, mesh, func): + if f.dtype == np.double and f.flags.c_contiguous: + # np.asarray or np.astype is too slow + f = _copy_d2z(f) + else: + f = np.asarray(f, order='C', dtype=np.complex128) + mesh = np.asarray(mesh, order='C', dtype=np.int32) + rank = len(mesh) + out = np.empty_like(f) + fn = getattr(libfft, func) + for i, fi in enumerate(f): + fn(fi.ctypes.data_as(ctypes.c_void_p), + out[i].ctypes.data_as(ctypes.c_void_p), + mesh.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(rank)) + return out + + def _fftn_wrapper(a): + mesh = a.shape[1:] + return _complex_fftn_fftw(a, mesh, 'fft') + def _ifftn_wrapper(a): + mesh = a.shape[1:] + return _complex_fftn_fftw(a, mesh, 'ifft') + +elif FFT_ENGINE == 'PYFFTW': # pyfftw is slower than np.fft in most cases try: import pyfftw @@ -235,8 +274,9 @@ def get_coulG(cell, k=np.zeros(3), exx=False, mf=None, mesh=None, Gv=None, else: kG = Gv - equal2boundary = np.zeros(Gv.shape[0], dtype=bool) + equal2boundary = None if wrap_around and abs(k).sum() > 1e-9: + equal2boundary = np.zeros(Gv.shape[0], dtype=bool) # Here we 'wrap around' the high frequency k+G vectors into their lower # frequency counterparts. Important if you want the gamma point and k-point # answers to agree @@ -357,7 +397,8 @@ def get_coulG(cell, k=np.zeros(3), exx=False, mf=None, mesh=None, Gv=None, if cell.dimension > 0 and exxdiv == 'ewald' and len(G0_idx) > 0: coulG[G0_idx] += Nk*cell.vol*madelung(cell, kpts) - coulG[equal2boundary] = 0 + if equal2boundary is not None: + coulG[equal2boundary] = 0 # Scale the coulG kernel for attenuated Coulomb integrals. # * omega is used by RangeSeparatedJKBuilder which requires ewald probe charge @@ -507,7 +548,7 @@ def get_lattice_Ls(cell, nimgs=None, rcut=None, dimension=None, discard=True): a = cell.lattice_vectors() - scaled_atom_coords = np.linalg.solve(a.T, cell.atom_coords().T).T + scaled_atom_coords = cell.get_scaled_atom_coords() atom_boundary_max = scaled_atom_coords[:,:dimension].max(axis=0) atom_boundary_min = scaled_atom_coords[:,:dimension].min(axis=0) if (np.any(atom_boundary_max > 1) or np.any(atom_boundary_min < -1)): @@ -542,11 +583,12 @@ def find_boundary(a): np.arange(-bounds[2], bounds[2]+1))) Ls = np.dot(Ts[:,:dimension], a[:dimension]) - ovlp_penalty += 1e-200 # avoid /0 - Ts_scaled = (Ts[:,:dimension] + 1e-200) / ovlp_penalty - ovlp_penalty_fac = 1. / abs(Ts_scaled).min(axis=1) - Ls_mask = np.linalg.norm(Ls, axis=1) * (1-ovlp_penalty_fac) < rcut - Ls = Ls[Ls_mask] + if discard: + ovlp_penalty += 1e-200 # avoid /0 + Ts_scaled = (Ts[:,:dimension] + 1e-200) / ovlp_penalty + ovlp_penalty_fac = 1. / abs(Ts_scaled).min(axis=1) + Ls_mask = np.linalg.norm(Ls, axis=1) * (1-ovlp_penalty_fac) < rcut + Ls = Ls[Ls_mask] return np.asarray(Ls, order='C') diff --git a/pyscf/scf/atom_hf.py b/pyscf/scf/atom_hf.py index 58e0a585c3..4430963493 100644 --- a/pyscf/scf/atom_hf.py +++ b/pyscf/scf/atom_hf.py @@ -30,6 +30,7 @@ def get_atm_nrhf(mol, atomic_configuration=elements.NRSRHF_CONFIGURATION): atm_template = mol.copy(deep=False) atm_template.charge = 0 + atm_template.enuc = 0 atm_template.symmetry = False # TODO: enable SO3 symmetry here atm_template.atom = atm_template._atom = [] atm_template.cart = False # AtomSphAverageRHF does not support cartesian basis @@ -50,7 +51,6 @@ def get_atm_nrhf(mol, atomic_configuration=elements.NRSRHF_CONFIGURATION): atm._ecpbas[:,0] = 0 if element in mol._pseudo: atm._pseudo = {element: mol._pseudo.get(element)} - raise NotImplementedError atm.spin = atm.nelectron % 2 nao = atm.nao @@ -59,6 +59,19 @@ def get_atm_nrhf(mol, atomic_configuration=elements.NRSRHF_CONFIGURATION): mo_occ = mo_energy = numpy.zeros(nao) mo_coeff = numpy.zeros((nao,nao)) atm_scf_result[element] = (0, mo_energy, mo_coeff, mo_occ) + elif atm._pseudo: + from pyscf.scf import atom_hf_pp + atm.a = None + if atm.nelectron == 1: + atm_hf = atom_hf_pp.AtomHF1ePP(atm) + else: + atm_hf = atom_hf_pp.AtomSCFPP(atm) + atm_hf.atomic_configuration = atomic_configuration + + atm_hf.verbose = mol.verbose + atm_hf.run() + atm_scf_result[element] = (atm_hf.e_tot, atm_hf.mo_energy, + atm_hf.mo_coeff, atm_hf.mo_occ) else: if atm.nelectron == 1: atm_hf = AtomHF1e(atm) diff --git a/pyscf/scf/atom_hf_pp.py b/pyscf/scf/atom_hf_pp.py new file mode 100644 index 0000000000..19a2f73930 --- /dev/null +++ b/pyscf/scf/atom_hf_pp.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import copy +import numpy +from scipy.special import erf + +from pyscf import lib +from pyscf import gto, scf +from pyscf.dft import gen_grid, numint +from pyscf.pbc import gto as pbcgto +from pyscf.scf import atom_hf, rohf + +def get_pp_loc_part1_rs(mol, coords): + atm_coords = mol.atom_coords() + out = 0 + for ia in range(mol.natm): + r0 = atm_coords[ia] + r2 = numpy.sum((coords - r0)**2, axis=1) + r = numpy.sqrt(r2) + Zia = mol.atom_charge(ia) + symb = mol.atom_symbol(ia) + if symb in mol._pseudo: + pp = mol._pseudo[symb] + rloc, nexp, cexp = pp[1:3+1] + else: + rloc = 1e16 + alpha = 1.0 / (numpy.sqrt(2) * rloc) + out += - Zia / r * erf(alpha * r) + return out + +def _aux_e2(cell, auxcell, intor, aosym='s1', comp=1): + intor = cell._add_suffix(intor) + pcell = copy.copy(cell) + pcell._atm, pcell._bas, pcell._env = \ + atm, bas, env = gto.conc_env(cell._atm, cell._bas, cell._env, + cell._atm, cell._bas, cell._env) + ao_loc = gto.moleintor.make_loc(bas, intor) + aux_loc = auxcell.ao_loc_nr(auxcell.cart or 'ssc' in intor) + ao_loc = numpy.asarray(numpy.hstack([ao_loc, ao_loc[-1]+aux_loc[1:]]), + dtype=numpy.int32) + atm, bas, env = gto.conc_env(atm, bas, env, + auxcell._atm, auxcell._bas, auxcell._env) + nbas = cell.nbas + shls_slice = (0, nbas, nbas, nbas*2, nbas*2, nbas*2+auxcell.nbas) + comp = 1 + out = gto.moleintor.getints3c(intor, atm, bas, env, shls_slice=shls_slice, + comp=comp, aosym=aosym, ao_loc=ao_loc) + return out + +def get_pp_loc_part2(mol): + buf = 0 + intors = ('int3c2e', 'int3c1e', 'int3c1e_r2_origk', + 'int3c1e_r4_origk', 'int3c1e_r6_origk') + for cn in range(1, 5): + fakecell = pbcgto.pseudo.pp_int.fake_cell_vloc(mol, cn) + if fakecell.nbas > 0: + v = _aux_e2(mol, fakecell, intors[cn], aosym='s2', comp=1) + buf += numpy.einsum('...i->...', v) + if numpy.isscalar(buf): + vpp_loc = buf + else: + vpp_loc = lib.unpack_tril(buf) + return vpp_loc + +def get_pp_loc(mol): + # TODO use analytic integral + grids = gen_grid.Grids(mol) + grids.level = 3 + grids.build(with_non0tab=True) + _numint = numint.NumInt() + + vpp = 0 + for ao, mask, weight, coords in _numint.block_loop(mol, grids): + vloc = get_pp_loc_part1_rs(mol, coords) + vpp += numpy.einsum("g,g,gi,gj->ij", weight, vloc, ao, ao) + vpp += get_pp_loc_part2(mol) + return vpp + +def get_pp_nl(mol): + nao = mol.nao + fakecell, hl_blocks = pbcgto.pseudo.pp_int.fake_cell_vnl(mol) + ppnl_half = _int_vnl(mol, fakecell, hl_blocks) + + ppnl = numpy.zeros((nao,nao), dtype=numpy.double) + offset = [0] * 3 + for ib, hl in enumerate(hl_blocks): + l = fakecell.bas_angular(ib) + nd = 2 * l + 1 + hl_dim = hl.shape[0] + ilp = numpy.ndarray((hl_dim,nd,nao), dtype=numpy.double) + for i in range(hl_dim): + p0 = offset[i] + ilp[i] = ppnl_half[i][p0:p0+nd] + offset[i] = p0 + nd + ppnl += numpy.einsum('ilp,ij,jlq->pq', ilp, hl, ilp) + return ppnl + +def _int_vnl(cell, fakecell, hl_blocks): + intopt = lib.c_null_ptr() + + def int_ket(_bas, intor): + if len(_bas) == 0: + return [] + intor = cell._add_suffix(intor) + atm, bas, env = gto.conc_env(cell._atm, cell._bas, cell._env, + fakecell._atm, _bas, fakecell._env) + atm = numpy.asarray(atm, dtype=numpy.int32) + bas = numpy.asarray(bas, dtype=numpy.int32) + env = numpy.asarray(env, dtype=numpy.double) + nbas = len(bas) + shls_slice = (cell.nbas, nbas, 0, cell.nbas) + ao_loc = gto.moleintor.make_loc(bas, intor) + ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] + nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] + out = numpy.empty((ni,nj), dtype=numpy.double) + comp = 1 + out = gto.moleintor.getints2c(intor, atm, bas, env, shls_slice=shls_slice, comp=comp, hermi=0, + ao_loc=ao_loc, cintopt=intopt, out=out) + return out + + hl_dims = numpy.asarray([len(hl) for hl in hl_blocks]) + out = (int_ket(fakecell._bas[hl_dims>0], 'int1e_ovlp'), + int_ket(fakecell._bas[hl_dims>1], 'int1e_r2_origi'), + int_ket(fakecell._bas[hl_dims>2], 'int1e_r4_origi')) + return out + +class AtomSCFPP(atom_hf.AtomSphAverageRHF): + def get_hcore(self, mol=None): + if mol is None: + mol = self.mol + h = mol.intor('int1e_kin', hermi=1) + h += get_pp_nl(mol) + h += get_pp_loc(mol) + return h + +class AtomHF1ePP(rohf.HF1e, AtomSCFPP): + eig = AtomSCFPP.eig + get_hcore = AtomSCFPP.get_hcore diff --git a/pyscf/scf/dhf.py b/pyscf/scf/dhf.py index 32d2d0f7f2..6e29d5a450 100644 --- a/pyscf/scf/dhf.py +++ b/pyscf/scf/dhf.py @@ -285,14 +285,14 @@ def fproj(mo): return dm -def get_init_guess(mol, key='minao'): +def get_init_guess(mol, key='minao', **kwargs): '''Generate density matrix for initial guess Kwargs: key : str One of 'minao', 'atom', 'huckel', 'mod_huckel', 'hcore', '1e', 'chkfile'. ''' - return UHF(mol).get_init_guess(mol, key) + return UHF(mol).get_init_guess(mol, key, **kwargs) def time_reversal_matrix(mol, mat): ''' T(A_ij) = A[T(i),T(j)]^* diff --git a/pyscf/scf/diis.py b/pyscf/scf/diis.py index 321f81cdfe..a442f58b9c 100644 --- a/pyscf/scf/diis.py +++ b/pyscf/scf/diis.py @@ -72,13 +72,13 @@ def get_num_vec(self): def get_err_vec_orig(s, d, f): '''error vector = SDF - FDS''' if isinstance(f, numpy.ndarray) and f.ndim == 2: - sdf = reduce(numpy.dot, (s,d,f)) + sdf = reduce(lib.dot, (s,d,f)) errvec = (sdf.conj().T - sdf).ravel() elif isinstance(f, numpy.ndarray) and f.ndim == 3 and s.ndim == 3: errvec = [] for i in range(f.shape[0]): - sdf = reduce(numpy.dot, (s[i], d[i], f[i])) + sdf = reduce(lib.dot, (s[i], d[i], f[i])) errvec.append((sdf.conj().T - sdf).ravel()) errvec = numpy.hstack(errvec) @@ -98,7 +98,7 @@ def get_err_vec_orth(s, d, f, Corth): sym_forbid = orbsym[:,None] != orbsym if isinstance(f, numpy.ndarray) and f.ndim == 2: - sdf = reduce(numpy.dot, (Corth.conj().T, s, d, f, Corth)) + sdf = reduce(lib.dot, (Corth.conj().T, s, d, f, Corth)) if orbsym is not None: sdf[sym_forbid] = 0 errvec = (sdf.conj().T - sdf).ravel() @@ -106,7 +106,7 @@ def get_err_vec_orth(s, d, f, Corth): elif isinstance(f, numpy.ndarray) and f.ndim == 3 and s.ndim == 3: errvec = [] for i in range(f.shape[0]): - sdf = reduce(numpy.dot, (Corth[i].conj().T, s[i], d[i], f[i], Corth[i])) + sdf = reduce(lib.dot, (Corth[i].conj().T, s[i], d[i], f[i], Corth[i])) if orbsym is not None: sdf[sym_forbid] = 0 errvec.append((sdf.conj().T - sdf).ravel()) diff --git a/pyscf/scf/hf.py b/pyscf/scf/hf.py index b6ecb5ace0..7a8c0e8f22 100644 --- a/pyscf/scf/hf.py +++ b/pyscf/scf/hf.py @@ -115,8 +115,10 @@ def kernel(mf, conv_tol=1e-10, conv_tol_grad=None, logger.info(mf, 'Set gradient conv threshold to %g', conv_tol_grad) mol = mf.mol + s1e = mf.get_ovlp(mol) + if dm0 is None: - dm = mf.get_init_guess(mol, mf.init_guess) + dm = mf.get_init_guess(mol, mf.init_guess, s1e=s1e) else: dm = dm0 @@ -128,13 +130,6 @@ def kernel(mf, conv_tol=1e-10, conv_tol_grad=None, scf_conv = False mo_energy = mo_coeff = mo_occ = None - s1e = mf.get_ovlp(mol) - cond = lib.cond(s1e) - logger.debug(mf, 'cond(S) = %s', cond) - if numpy.max(cond)*1e-17 > conv_tol: - logger.warn(mf, 'Singularity detected in overlap matrix (condition number = %4.3g). ' - 'SCF may be inaccurate and hard to converge.', numpy.max(cond)) - # Skip SCF iterations. Compute only the total energy of the initial density if mf.max_cycle <= 0: fock = mf.get_fock(h1e, s1e, vhf, dm) # = h1e + vhf, no DIIS @@ -722,14 +717,14 @@ def fproj(mo): return dm -def get_init_guess(mol, key='minao'): +def get_init_guess(mol, key='minao', **kwargs): '''Generate density matrix for initial guess Kwargs: key : str One of 'minao', 'atom', 'huckel', 'hcore', '1e', 'chkfile'. ''' - return RHF(mol).get_init_guess(mol, key) + return RHF(mol).get_init_guess(mol, key, **kwargs) # eigenvalue of d is 1 @@ -752,7 +747,7 @@ def level_shift(s, d, f, factor): Returns: New Fock matrix, 2D ndarray ''' - dm_vir = s - reduce(numpy.dot, (s, d, s)) + dm_vir = s - reduce(lib.dot, (s, d, s)) return f + dm_vir * factor @@ -1570,6 +1565,15 @@ def __init__(self, mol): self._opt = {None: None} self._eri = None # Note: self._eri requires large amount of memory + def check_sanity(self): + s1e = self.get_ovlp() + cond = lib.cond(s1e) + logger.debug(self, 'cond(S) = %s', cond) + if numpy.max(cond)*1e-17 > self.conv_tol: + logger.warn(self, 'Singularity detected in overlap matrix (condition number = %4.3g). ' + 'SCF may be inaccurate and hard to converge.', numpy.max(cond)) + return super().check_sanity() + def build(self, mol=None): if mol is None: mol = self.mol if self.verbose >= logger.WARN: @@ -1704,7 +1708,7 @@ def from_chk(self, chkfile=None, project=None): return self.init_guess_by_chkfile(chkfile, project) from_chk.__doc__ = init_guess_by_chkfile.__doc__ - def get_init_guess(self, mol=None, key='minao'): + def get_init_guess(self, mol=None, key='minao', **kwargs): if not isinstance(key, str): return key @@ -1742,7 +1746,7 @@ def get_init_guess(self, mol=None, key='minao'): energy_tot = energy_tot def energy_nuc(self): - return self.mol.energy_nuc() + return self.mol.enuc # A hook for overloading convergence criteria in SCF iterations. Assigning # a function @@ -2103,8 +2107,8 @@ def check_sanity(self): mol.nelectron) return SCF.check_sanity(self) - def get_init_guess(self, mol=None, key='minao'): - dm = SCF.get_init_guess(self, mol, key) + def get_init_guess(self, mol=None, key='minao', **kwargs): + dm = SCF.get_init_guess(self, mol, key, **kwargs) if self.verbose >= logger.DEBUG1: s = self.get_ovlp() nelec = numpy.einsum('ij,ji', dm, s).real diff --git a/pyscf/scf/uhf.py b/pyscf/scf/uhf.py index 4f07335bd6..0afc66d0ba 100644 --- a/pyscf/scf/uhf.py +++ b/pyscf/scf/uhf.py @@ -130,8 +130,8 @@ def _break_dm_spin_symm(mol, dm): dmb[...,p0:p1,p0:p1] = dma[...,p0:p1,p0:p1] return dma, dmb -def get_init_guess(mol, key='minao'): - return UHF(mol).get_init_guess(mol, key) +def get_init_guess(mol, key='minao', **kwargs): + return UHF(mol).get_init_guess(mol, key, **kwargs) def make_rdm1(mo_coeff, mo_occ, **kwargs): '''One-particle density matrix in AO representation @@ -830,8 +830,8 @@ def make_rdm2(self, mo_coeff=None, mo_occ=None, **kwargs): energy_elec = energy_elec - def get_init_guess(self, mol=None, key='minao'): - dm = hf.SCF.get_init_guess(self, mol, key) + def get_init_guess(self, mol=None, key='minao', **kwargs): + dm = hf.SCF.get_init_guess(self, mol, key, **kwargs) if self.verbose >= logger.DEBUG1: s = self.get_ovlp() nelec =(numpy.einsum('ij,ji', dm[0], s).real, From ce69d48e16996f25236fd1ca4b60e062f37c8369 Mon Sep 17 00:00:00 2001 From: sunchong137 Date: Sun, 25 Feb 2024 19:26:51 -0600 Subject: [PATCH 16/44] Fix smearing with predefined chemical potential (#2098) * added hubbard model with UHF example, fixed chemical potential bug in addons.py * fix pbc smearing * add test for smearing with mu0 --------- Co-authored-by: fishjojo --- examples/scf/72-hubbard_finite_temp.py | 44 ++++++++++++++++++++++++++ pyscf/pbc/scf/addons.py | 16 +++++++--- pyscf/scf/addons.py | 18 +++++++---- pyscf/scf/test/test_addons.py | 28 ++++++++++++++++ 4 files changed, 95 insertions(+), 11 deletions(-) create mode 100644 examples/scf/72-hubbard_finite_temp.py diff --git a/examples/scf/72-hubbard_finite_temp.py b/examples/scf/72-hubbard_finite_temp.py new file mode 100644 index 0000000000..9033191baf --- /dev/null +++ b/examples/scf/72-hubbard_finite_temp.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# +# Author: Chong Sun +# + +''' +Simulate model systems with HF. +Half-filled Hubbard model. +''' + +from pyscf import gto, scf , ao2mo +import numpy + +def _hubbard_hamilts_pbc(L, U): + h1e = numpy.zeros((L, L)) + g2e = numpy.zeros((L,)*4) + for i in range(L): + h1e[i, (i+1)%L] = h1e[(i+1)%L, i] = -1 + g2e[i, i, i, i] = U + return h1e, g2e + +L = 10 +U = 4 + +mol = gto.M() +mol.nelectron = L +mol.nao = L +mol.spin = 0 +mol.incore_anyway = True +mol.build() + +# set hamiltonian +h1e, eri = _hubbard_hamilts_pbc(L, U) +mf = scf.UHF(mol) +mf.get_hcore = lambda *args: h1e +mf._eri = ao2mo.restore(1, eri, L) +mf.get_ovlp = lambda *args: numpy.eye(L) +mf.kernel() + +# finite temperature +from pyscf.scf import addons +beta = 1 +mf_ft = addons.smearing(mf, sigma=1./beta, method='fermi', fix_spin=True) +mf_ft.kernel() diff --git a/pyscf/pbc/scf/addons.py b/pyscf/pbc/scf/addons.py index 4a726c25fc..474833fd98 100644 --- a/pyscf/pbc/scf/addons.py +++ b/pyscf/pbc/scf/addons.py @@ -121,12 +121,17 @@ def get_occ(self, mo_energy_kpts=None, mo_coeff_kpts=None): if self.mu0 is None: mu_a, occa = mol_addons._smearing_optimize(f_occ, mo_es[0], nocc[0], sigma) mu_b, occb = mol_addons._smearing_optimize(f_occ, mo_es[1], nocc[1], sigma) - mu = [mu_a, mu_b] - mo_occs = [occa, occb] else: - mu = self.mu0 - mo_occs = f_occ(mu[0], mo_es[0], sigma) - mo_occs = f_occ(mu[1], mo_es[1], sigma) + if numpy.isscalar(self.mu0): + mu_a = mu_b = self.mu0 + elif len(self.mu0) == 2: + mu_a, mu_b = self.mu0 + else: + raise TypeError(f'Unsupported mu0: {self.mu0}') + occa = f_occ(mu_a, mo_es[0], sigma) + occb = f_occ(mu_b, mo_es[1], sigma) + mu = [mu_a, mu_b] + mo_occs = [occa, occb] self.entropy = self._get_entropy(mo_es[0], mo_occs[0], mu[0]) self.entropy += self._get_entropy(mo_es[1], mo_occs[1], mu[1]) self.entropy /= nkpts @@ -163,6 +168,7 @@ def get_occ(self, mo_energy_kpts=None, mo_coeff_kpts=None): else: # If mu0 is given, fix mu instead of electron number. XXX -Chong Sun mu = self.mu0 + assert numpy.isscalar(mu) mo_occs = f_occ(mu, mo_es, sigma) self.entropy = self._get_entropy(mo_es, mo_occs, mu) / nkpts if is_rhf: diff --git a/pyscf/scf/addons.py b/pyscf/scf/addons.py index 213d11721a..a120087f33 100644 --- a/pyscf/scf/addons.py +++ b/pyscf/scf/addons.py @@ -140,12 +140,17 @@ def get_occ(self, mo_energy=None, mo_coeff=None): if self.mu0 is None: mu_a, occa = _smearing_optimize(f_occ, mo_es[0], nocc[0], sigma) mu_b, occb = _smearing_optimize(f_occ, mo_es[1], nocc[1], sigma) - mu = [mu_a, mu_b] - mo_occs = [occa, occb] else: - mu = self.mu0 - mo_occs = f_occ(mu[0], mo_es[0], sigma) - mo_occs = f_occ(mu[1], mo_es[1], sigma) + if numpy.isscalar(self.mu0): + mu_a = mu_b = self.mu0 + elif len(self.mu0) == 2: + mu_a, mu_b = self.mu0 + else: + raise TypeError(f'Unsupported mu0: {self.mu0}') + occa = f_occ(mu_a, mo_es[0], sigma) + occb = f_occ(mu_b, mo_es[1], sigma) + mu = [mu_a, mu_b] + mo_occs = [occa, occb] self.entropy = self._get_entropy(mo_es[0], mo_occs[0], mu[0]) self.entropy += self._get_entropy(mo_es[1], mo_occs[1], mu[1]) fermi = (_get_fermi(mo_es[0], nocc[0]), _get_fermi(mo_es[1], nocc[1])) @@ -163,7 +168,7 @@ def get_occ(self, mo_energy=None, mo_coeff=None): if is_rohf: mo_occs = mo_occs[0] + mo_occs[1] else: # all orbitals treated with the same fermi level - nocc = nelectron = self.mol.tot_electrons() + nocc = nelectron = self.mol.nelectron if is_uhf: mo_es = numpy.hstack(mo_energy) else: @@ -176,6 +181,7 @@ def get_occ(self, mo_energy=None, mo_coeff=None): else: # If mu0 is given, fix mu instead of electron number. XXX -Chong Sun mu = self.mu0 + assert numpy.isscalar(mu) mo_occs = f_occ(mu, mo_es, sigma) self.entropy = self._get_entropy(mo_es, mo_occs, mu) if is_rhf: diff --git a/pyscf/scf/test/test_addons.py b/pyscf/scf/test/test_addons.py index f0d0da2ec6..88595191cd 100644 --- a/pyscf/scf/test/test_addons.py +++ b/pyscf/scf/test/test_addons.py @@ -459,6 +459,34 @@ def test_rohf_smearing(self): self.assertAlmostEqual(myhf_s.e_tot, -243.086989253, 5) self.assertAlmostEqual(myhf_s.entropy, 17.11431, 4) + def test_smearing_mu0(self): + def _hubbard_hamilts_pbc(L, U): + h1e = numpy.zeros((L, L)) + g2e = numpy.zeros((L,)*4) + for i in range(L): + h1e[i, (i+1)%L] = h1e[(i+1)%L, i] = -1 + g2e[i, i, i, i] = U + return h1e, g2e + + L = 10 + U = 4 + + mol = gto.M() + mol.nelectron = L + mol.nao = L + mol.incore_anyway = True + mol.build() + + h1e, eri = _hubbard_hamilts_pbc(L, U) + mf = scf.UHF(mol) + mf.get_hcore = lambda *args: h1e + mf._eri = eri + mf.get_ovlp = lambda *args: numpy.eye(L) + mf_ft = addons.smearing(mf, sigma=.1, mu0=2., fix_spin=True) + mf_ft.kernel() + self.assertAlmostEqual(mf_ft.e_tot, -2.93405853397115, 5) + self.assertAlmostEqual(mf_ft.entropy, 0.11867520273160392, 5) + if __name__ == "__main__": print("Full Tests for addons") unittest.main() From fb49e40667f5d145074406c46047a0556dc94065 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Mon, 26 Feb 2024 14:46:42 -0800 Subject: [PATCH 17/44] Add tests for ndarray_pointer_2d function (#2101) * Add tests for ndarray_pointer_2d function * Fix ndarray_pointer_2d --- pyscf/lib/numpy_helper.py | 10 ++++------ pyscf/lib/test/test_numpy_helper.py | 5 +++++ pyscf/pbc/dft/multigrid/multigrid_pair.py | 4 ++-- pyscf/pbc/gto/cell.py | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pyscf/lib/numpy_helper.py b/pyscf/lib/numpy_helper.py index 58508d9f8b..e05e687f1e 100644 --- a/pyscf/lib/numpy_helper.py +++ b/pyscf/lib/numpy_helper.py @@ -1117,15 +1117,13 @@ def expm(a): return y def ndarray_pointer_2d(array): - '''Get the C pointer of a 2D array + '''Return an array that contains the addresses of the first element in each + row of the input 2d array. ''' assert array.ndim == 2 assert array.flags.c_contiguous - - ptr = (array.ctypes.data + - numpy.arange(array.shape[0])*array.strides[0]).astype(numpy.uintp) - ptr = ptr.ctypes.data_as(ctypes.c_void_p) - return ptr + i = numpy.arange(array.shape[0]) + return array.ctypes.data + (i * array.strides[0]).astype(numpy.uintp) class NPArrayWithTag(numpy.ndarray): # Initialize kwargs in function tag_array diff --git a/pyscf/lib/test/test_numpy_helper.py b/pyscf/lib/test/test_numpy_helper.py index 93e698f354..0b9ca0ec57 100644 --- a/pyscf/lib/test/test_numpy_helper.py +++ b/pyscf/lib/test/test_numpy_helper.py @@ -225,6 +225,11 @@ def test_split_reshape(self): self.assertRaises(ValueError, lib.split_reshape, numpy.arange(3), ((2,2),)) self.assertRaises(ValueError, lib.split_reshape, numpy.arange(3), (2,2)) + def test_ndarray_pointer_2d(self): + a = numpy.eye(3) + addr = lib.ndarray_pointer_2d(a) + self.assertTrue(all(addr == a.ctypes.data + numpy.array([0, 24, 48]))) + if __name__ == "__main__": print("Full Tests for numpy_helper") unittest.main() diff --git a/pyscf/pbc/dft/multigrid/multigrid_pair.py b/pyscf/pbc/dft/multigrid/multigrid_pair.py index 3ef43b688d..82068b7d93 100644 --- a/pyscf/pbc/dft/multigrid/multigrid_pair.py +++ b/pyscf/pbc/dft/multigrid/multigrid_pair.py @@ -313,12 +313,12 @@ def build_task_list(cell, gridlevel_info, cell1=None, Ls=None, hermi=0, precisio ish_bas.ctypes.data_as(ctypes.c_void_p), ish_env.ctypes.data_as(ctypes.c_void_p), ish_rcut.ctypes.data_as(ctypes.c_void_p), - ptr_ipgf_rcut, + ptr_ipgf_rcut.ctypes, jsh_atm.ctypes.data_as(ctypes.c_void_p), jsh_bas.ctypes.data_as(ctypes.c_void_p), jsh_env.ctypes.data_as(ctypes.c_void_p), jsh_rcut.ctypes.data_as(ctypes.c_void_p), - ptr_jpgf_rcut, + ptr_jpgf_rcut.ctypes, ctypes.c_int(nish), ctypes.c_int(njsh), Ls.ctypes.data_as(ctypes.c_void_p), ctypes.c_double(precision), ctypes.c_int(hermi)) diff --git a/pyscf/pbc/gto/cell.py b/pyscf/pbc/gto/cell.py index 872fda36e2..15b4fa26d9 100644 --- a/pyscf/pbc/gto/cell.py +++ b/pyscf/pbc/gto/cell.py @@ -995,7 +995,7 @@ def rcut_by_shells(cell, precision=None, rcut=0, nprim = bas[:,mole.NPRIM_OF].max() # be careful that the unused memory blocks are not initialized pgf_radius = np.empty((nbas,nprim), order='C', dtype=np.double) - ptr_pgf_radius = lib.ndarray_pointer_2d(pgf_radius) + ptr_pgf_radius = lib.ndarray_pointer_2d(pgf_radius).ctypes else: ptr_pgf_radius = lib.c_null_ptr() fn = getattr(libpbc, 'rcut_by_shells', None) From e2cc8c136ed0e57f8597ce3a06e66f97630916ac Mon Sep 17 00:00:00 2001 From: Victor Yu Date: Wed, 28 Feb 2024 00:16:25 -0600 Subject: [PATCH 18/44] Fix transform_ci for more than 64 orbitals (#2095) * Fix transform_ci for more than 64 orbitals * Separate occ_masks into a function --- pyscf/fci/addons.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/pyscf/fci/addons.py b/pyscf/fci/addons.py index 95fda3f6ce..3f794f1a63 100644 --- a/pyscf/fci/addons.py +++ b/pyscf/fci/addons.py @@ -658,22 +658,15 @@ def transform_ci(ci, nelec, u): nb_new = cistring.num_strings(norb_new, nelecb) ci = ci.reshape(na_old, nb_old) - one_particle_strs_old = numpy.asarray([1 << i for i in range(norb_old)]) - one_particle_strs_new = numpy.asarray([1 << i for i in range(norb_new)]) - if neleca == 0: trans_ci_a = numpy.ones((1, 1)) else: trans_ci_a = numpy.zeros((na_old, na_new), dtype=ua.dtype) - strs_old = numpy.asarray(cistring.make_strings(range(norb_old), neleca)) - - # Unitary transformation array trans_ci is the overlap between two sets of CI basis. - occ_masks_old = (strs_old[:,None] & one_particle_strs_old) != 0 + occ_masks_old = _init_occ_masks(norb_old, neleca, na_old) if norb_old == norb_new: occ_masks_new = occ_masks_old else: - strs_new = numpy.asarray(cistring.make_strings(range(norb_new), neleca)) - occ_masks_new = (strs_new[:,None] & one_particle_strs_new) != 0 + occ_masks_new = _init_occ_masks(norb_new, neleca, na_new) # Perform #for i in range(na_old): # old basis @@ -692,14 +685,11 @@ def transform_ci(ci, nelec, u): trans_ci_b = numpy.ones((1, 1)) else: trans_ci_b = numpy.zeros((nb_old, nb_new), dtype=ub.dtype) - strs_old = numpy.asarray(cistring.make_strings(range(norb_old), nelecb)) - - occ_masks_old = (strs_old[:,None] & one_particle_strs_old) != 0 + occ_masks_old = _init_occ_masks(norb_old, nelecb, nb_old) if norb_old == norb_new: occ_masks_new = occ_masks_old else: - strs_new = numpy.asarray(cistring.make_strings(range(norb_new), nelecb)) - occ_masks_new = (strs_new[:,None] & one_particle_strs_new) != 0 + occ_masks_new = _init_occ_masks(norb_new, nelecb, nb_new) occ_idx_all_strs = numpy.where(occ_masks_new)[1].reshape(nb_new,nelecb) for i in range(nb_old): @@ -725,4 +715,17 @@ def _unpack_nelec(nelec, spin=None): nelec = neleca, nelecb return nelec +def _init_occ_masks(norb, nelec, nci): + one_particle_strs = numpy.asarray(cistring.make_strings(range(norb), 1)) + strs = numpy.asarray(cistring.make_strings(range(norb), nelec)) + if norb < 64: + occ_masks = (strs[:,None] & one_particle_strs) != 0 + else: + occ_masks = numpy.zeros((nci, norb), dtype=bool) + for i in range(nci): + for j in range(norb): + if one_particle_strs[j][0] in strs[i]: + occ_masks[i,j] = True + return occ_masks + del (LARGE_CI_TOL, RETURN_STRS, PENALTY) From a40064009cd3865bce6315d9f87323340e3f343c Mon Sep 17 00:00:00 2001 From: Xubo Wang Date: Wed, 28 Feb 2024 14:39:18 -0500 Subject: [PATCH 19/44] fix binomial function in fci_string --- pyscf/lib/mcscf/fci_string.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pyscf/lib/mcscf/fci_string.c b/pyscf/lib/mcscf/fci_string.c index adae90da46..5ef53b5161 100644 --- a/pyscf/lib/mcscf/fci_string.c +++ b/pyscf/lib/mcscf/fci_string.c @@ -130,12 +130,7 @@ static int binomial(int n, int m) if (m*2 <= n) { m = n - m; } - uint64_t i; - uint64_t val = 1; - for (i = m; i <= n; i++) { - val *= i; - val /= i - m; - } + int val = binomial(n-1,m-1) + binomial(n-1,m); return val; } } From 93898c2b17c56f8632d1a1c3471df6ab97e88073 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Thu, 7 Mar 2024 10:39:36 -0800 Subject: [PATCH 20/44] Fix xcfun high-order derivatives --- pyscf/dft/test/test_xc_deriv.py | 44 ++++++++++++++++++++++++++++++--- pyscf/dft/xc_deriv.py | 7 +++--- pyscf/lib/libxcfun.patch | 6 ++--- 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/pyscf/dft/test/test_xc_deriv.py b/pyscf/dft/test/test_xc_deriv.py index b6e430ffc2..ee976141b1 100644 --- a/pyscf/dft/test/test_xc_deriv.py +++ b/pyscf/dft/test/test_xc_deriv.py @@ -419,11 +419,11 @@ def test_libxc_mgga_deriv3(self): def test_libxc_gga_deriv4(self): rho1 = rho[:,:4].copy() xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=4) - self.assertAlmostEqual(xc1.sum(), -920.135878252819, 4) + self.assertAlmostEqual(xc1.sum(), -1141.356286780069, 1) rho1 = rho[1,:4].copy() xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=4) - self.assertAlmostEqual(xc1.sum(), -869.6617638095072, 4) + self.assertAlmostEqual(xc1.sum(), -615.116081052867, 1) @unittest.skipIf(not hasattr(dft, 'xcfun'), 'xcfun order') def test_xcfun_lda_deriv3(self): @@ -525,11 +525,47 @@ def test_xcfun_mgga_deriv3(self): def test_xcfun_gga_deriv4(self): rho1 = rho[:,:4].copy() xc1 = dft.xcfun.eval_xc_eff('PBE', rho1, deriv=4) - self.assertAlmostEqual(xc1.sum(), -920.135878252819, 9) + self.assertAlmostEqual(xc1.sum(), -1141.356286780069, 9) rho1 = rho[1,:4].copy() xc1 = dft.xcfun.eval_xc_eff('PBE', rho1, deriv=4) - self.assertAlmostEqual(xc1.sum(), -869.6617638095072, 9) + self.assertAlmostEqual(xc1.sum(), -615.116081052867, 9) + + @unittest.skipIf(not (hasattr(dft, 'xcfun') and dft.xcfun.MAX_DERIV_ORDER > 3), 'xcfun order') + def test_xcfun_gga_deriv4_finite_diff(self): + xctype = 'GGA' + deriv = 4 + nvar = 4 + delta = 1e-6 + + spin = 1 + rhop = rho[:,:nvar].copy() + xcp = dft.xcfun.eval_xc1('pbe,', rhop, spin, deriv=deriv) + lxc = xc_deriv.transform_xc(rhop, xcp, xctype, spin,4) + for s in (0, 1): + for t in range(nvar): + rhop = rho[:,:nvar].copy() + rhop[s,t] += delta * .5 + xcp = dft.xcfun.eval_xc1('pbe,', rhop, spin, deriv=deriv-1) + kxc0 = xc_deriv.transform_xc(rhop, xcp, xctype, spin, deriv-1) + rhop[s,t] -= delta + xcp = dft.xcfun.eval_xc1('pbe,', rhop, spin, deriv=deriv-1) + kxc1 = xc_deriv.transform_xc(rhop, xcp, xctype, spin, deriv-1) + self.assertAlmostEqual(abs((kxc0-kxc1)/delta - lxc[s,t]).max(), 0, 7) + + spin = 0 + rhop = rho[0,:nvar].copy() + xcp = dft.xcfun.eval_xc1('b88,', rhop, spin, deriv=deriv) + lxc = xc_deriv.transform_xc(rhop, xcp, xctype, spin,4) + for t in range(nvar): + rhop = rho[0,:nvar].copy() + rhop[t] += delta * .5 + xcp = dft.xcfun.eval_xc1('b88,', rhop, spin, deriv=deriv-1) + kxc0 = xc_deriv.transform_xc(rhop, xcp, xctype, spin, deriv-1) + rhop[t] -= delta + xcp = dft.xcfun.eval_xc1('b88,', rhop, spin, deriv=deriv-1) + kxc1 = xc_deriv.transform_xc(rhop, xcp, xctype, spin, deriv-1) + self.assertAlmostEqual(abs((kxc0-kxc1)/delta - lxc[t]).max(), 0, 7) if __name__ == "__main__": print("Test xc_deriv") diff --git a/pyscf/dft/xc_deriv.py b/pyscf/dft/xc_deriv.py index 59d4c4d905..dd8df01397 100644 --- a/pyscf/dft/xc_deriv.py +++ b/pyscf/dft/xc_deriv.py @@ -580,6 +580,7 @@ def transform_xc(rho, xc_val, xctype, spin, order): [dim_lst[i] for i in pair_comb] + rest_dims) xc_tensor_1[diag_idx] += xc_sub else: + i3to2x2 = _product_uniq_indices(2, 2) for n_pairs in range(1, order//2+1): p0, p1 = offsets[order-n_pairs:order-n_pairs+2] xc_sub = _unfold_gga(rho, xc_val[p0:p1], spin, order-n_pairs, @@ -589,9 +590,9 @@ def transform_xc(rho, xc_val, xctype, spin, order): for i in range(n_pairs): xc_sub[(slice(None),)*i+(0,)] *= 2 xc_sub[(slice(None),)*i+(2,)] *= 2 - sigma_idx = _product_uniq_indices(2, n_pairs*2) - xc_sub = xc_sub.reshape((3**n_pairs,) + xc_sub.shape[n_pairs:]) - xc_sub = xc_sub[sigma_idx] + sigma_idx = (i3to2x2[(slice(None),)*2 + (np.newaxis,)*(i*2)] + for i in reversed(range(n_pairs))) + xc_sub = xc_sub[tuple(sigma_idx)] low_sigmas = itertools.combinations(range(order), n_pairs*2) pair_combs = [list(itertools.chain(*p[::-1])) diff --git a/pyscf/lib/libxcfun.patch b/pyscf/lib/libxcfun.patch index 04e2a2a245..c49d1425e7 100644 --- a/pyscf/lib/libxcfun.patch +++ b/pyscf/lib/libxcfun.patch @@ -36,7 +36,7 @@ index 239cef5..6288e40 100644 + ttype in[XC_MAX_INVARS], out = 0; + for (int i = 0; i < inlen; i++) + in[i] = input[i]; -+ int k = 1 + inlen + (inlen * (inlen + 1)) / 2; ++ int k = (inlen + 1) * (inlen + 2) * (inlen + 3) * (inlen + 4) / 24; // comb(deriv-1+inlen, deriv-1); + for (int i = 0; i < inlen; i++) { + in[i].set(VAR0, 1); + for (int j = i; j < inlen; j++) { @@ -46,7 +46,7 @@ index 239cef5..6288e40 100644 + for (int s1 = s; s1 < inlen; s1++) { + in[s1].set(VAR3, 1); + for (int s2 = s1; s2 < inlen; s2++) { -+ in[s1].set(VAR4, 1); ++ in[s2].set(VAR4, 1); + densvars d(fun, in); + out = 0; + for (int n = 0; n < fun->nr_active_functionals; n++) @@ -71,7 +71,7 @@ index 239cef5..6288e40 100644 + ttype in[XC_MAX_INVARS], out = 0; + for (int i = 0; i < inlen; i++) + in[i] = input[i]; -+ int k = 1 + inlen + (inlen * (inlen + 1)) / 2; ++ int k = (inlen + 1) * (inlen + 2) * (inlen + 3) / 6; // comb(deriv-1+inlen, deriv-1); + for (int i = 0; i < inlen; i++) { + in[i].set(VAR0, 1); + for (int j = i; j < inlen; j++) { From bd59bf629bfee6dcafe35969760d582cec79afcf Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Mon, 11 Mar 2024 22:24:29 -0700 Subject: [PATCH 21/44] More type checks in fci solver (#2122) * float type checks * Update mcscf docstring (issue #2119) * Fix failed tests --- pyscf/fci/direct_nosym.py | 101 +++++++++++++++++--------------------- pyscf/fci/direct_spin0.py | 6 ++- pyscf/fci/direct_spin1.py | 12 +++-- pyscf/mcscf/__init__.py | 24 ++------- pyscf/mcscf/mc1step.py | 10 ++-- 5 files changed, 66 insertions(+), 87 deletions(-) diff --git a/pyscf/fci/direct_nosym.py b/pyscf/fci/direct_nosym.py index 5befc399cc..fcd67f5664 100644 --- a/pyscf/fci/direct_nosym.py +++ b/pyscf/fci/direct_nosym.py @@ -49,7 +49,8 @@ def contract_1e(h1e, fcivec, norb, nelec, link_index=None): na, nlinka = link_indexa.shape[:2] nb, nlinkb = link_indexb.shape[:2] - assert (fcivec.size == na*nb) + assert fcivec.size == na*nb + assert fcivec.dtype == h1e.dtype == numpy.float64 ci1 = numpy.zeros_like(fcivec) libfci.FCIcontract_a_1e_nosym(h1e.ctypes.data_as(ctypes.c_void_p), @@ -95,30 +96,47 @@ def contract_2e(eri, fcivec, norb, nelec, link_index=None): See also :func:`direct_nosym.absorb_h1e` ''' - fcivec = numpy.asarray(fcivec, order='C') link_indexa, link_indexb = _unpack(norb, nelec, link_index) - na, nlinka = link_indexa.shape[:2] nb, nlinkb = link_indexb.shape[:2] - assert (fcivec.size == na*nb) - ci1 = numpy.empty_like(fcivec) - - libfci.FCIcontract_2es1(eri.ctypes.data_as(ctypes.c_void_p), - fcivec.ctypes.data_as(ctypes.c_void_p), - ci1.ctypes.data_as(ctypes.c_void_p), - ctypes.c_int(norb), - ctypes.c_int(na), ctypes.c_int(nb), - ctypes.c_int(nlinka), ctypes.c_int(nlinkb), - link_indexa.ctypes.data_as(ctypes.c_void_p), - link_indexb.ctypes.data_as(ctypes.c_void_p)) - return ci1.view(direct_spin1.FCIvector) + assert fcivec.size == na*nb + if fcivec.dtype == eri.dtype == numpy.float64: + fcivec = numpy.asarray(fcivec, order='C') + eri = numpy.asarray(eri, order='C') + ci1 = numpy.empty_like(fcivec) + libfci.FCIcontract_2es1(eri.ctypes.data_as(ctypes.c_void_p), + fcivec.ctypes.data_as(ctypes.c_void_p), + ci1.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(norb), + ctypes.c_int(na), ctypes.c_int(nb), + ctypes.c_int(nlinka), ctypes.c_int(nlinkb), + link_indexa.ctypes.data_as(ctypes.c_void_p), + link_indexb.ctypes.data_as(ctypes.c_void_p)) + return ci1.view(direct_spin1.FCIvector) + + ciR = numpy.asarray(fcivec.real, order='C') + ciI = numpy.asarray(fcivec.imag, order='C') + eriR = numpy.asarray(eri.real, order='C') + eriI = numpy.asarray(eri.imag, order='C') + link_index = (link_indexa, link_indexb) + outR = contract_2e(eriR, ciR, norb, nelec, link_index=link_index) + outR -= contract_2e(eriI, ciI, norb, nelec, link_index=link_index) + outI = contract_2e(eriR, ciI, norb, nelec, link_index=link_index) + outI += contract_2e(eriI, ciR, norb, nelec, link_index=link_index) + out = outR.astype(numpy.complex128) + out.imag = outI + return outR def absorb_h1e(h1e, eri, norb, nelec, fac=1): '''Modify 2e Hamiltonian to include 1e Hamiltonian contribution. ''' if not isinstance(nelec, (int, numpy.number)): nelec = sum(nelec) - h2e = ao2mo.restore(1, eri.copy(), norb).astype(h1e.dtype, copy=False) + if h1e.dtype == eri.dtype == numpy.float64: + h2e = ao2mo.restore(1, eri.copy(), norb) + else: + assert eri.ndim == 4 + h2e = eri.astype(dtype=numpy.result_type(h1e, eri), copy=True) f1e = h1e - numpy.einsum('jiik->jk', h2e) * .5 f1e = f1e * (1./(nelec+1e-100)) for k in range(norb): @@ -133,7 +151,12 @@ def energy(h1e, eri, fcivec, norb, nelec, link_index=None): ci1 = contract_2e(h2e, fcivec, norb, nelec, link_index) return numpy.dot(fcivec.reshape(-1), ci1.reshape(-1)) -make_hdiag = direct_spin1.make_hdiag +def make_hdiag(h1e, eri, norb, nelec, compress=False): + if h1e.dtype == numpy.complex128: + h1e = h1e.real.copy() + if eri.dtype == numpy.complex128: + eri = eri.real.copy() + return direct_spin1.make_hdiag(h1e, eri, norb, nelec, compress) class FCISolver(direct_spin1.FCISolver): @@ -151,6 +174,10 @@ def contract_2e(self, eri, fcivec, norb, nelec, link_index=None): def absorb_h1e(self, h1e, eri, norb, nelec, fac=1): return absorb_h1e(h1e, eri, norb, nelec, fac) + def make_hdiag(self, h1e, eri, norb, nelec, compress=False): + nelec = direct_spin1._unpack_nelec(nelec, self.spin) + return make_hdiag(h1e, eri, norb, nelec, compress) + def kernel(self, h1e, eri, norb, nelec, ci0=None, tol=None, lindep=None, max_cycle=None, max_space=None, nroots=None, davidson_only=None, pspace_size=None, @@ -160,6 +187,7 @@ def kernel(self, h1e, eri, norb, nelec, ci0=None, neleca = nelec - nelecb else: neleca, nelecb = nelec + davidson_only = True link_indexa = cistring.gen_linkstr_index(range(norb), neleca) link_indexb = cistring.gen_linkstr_index(range(norb), nelecb) e, c = direct_spin1.kernel_ms1(self, h1e, eri, norb, nelec, ci0, @@ -206,42 +234,3 @@ def _unpack(norb, nelec, link_index): return link_indexa, link_indexb else: return link_index - - -if __name__ == '__main__': - from functools import reduce - from pyscf import gto - from pyscf import scf - - mol = gto.Mole() - mol.verbose = 0 - mol.output = None#"out_h2o" - mol.atom = [ - ['H', ( 1.,-1. , 0. )], - ['H', ( 0.,-1. ,-1. )], - ['H', ( 1.,-0.5 ,-1. )], - #['H', ( 0.,-0.5 ,-1. )], - #['H', ( 0.,-0.5 ,-0. )], - ['H', ( 0.,-0. ,-1. )], - ['H', ( 1.,-0.5 , 0. )], - ['H', ( 0., 1. , 1. )], - ] - - mol.basis = {'H': 'sto-3g'} - mol.build() - - m = scf.RHF(mol) - ehf = m.scf() - - cis = FCISolver(mol) - norb = m.mo_coeff.shape[1] - nelec = mol.nelectron - 2 - h1e = reduce(numpy.dot, (m.mo_coeff.T, m.get_hcore(), m.mo_coeff)) - eri = ao2mo.incore.general(m._eri, (m.mo_coeff,)*4, compact=False) - eri = eri.reshape(norb,norb,norb,norb) - nea = nelec//2 + 1 - neb = nelec//2 - 1 - nelec = (nea, neb) - - e1 = cis.kernel(h1e, eri, norb, nelec)[0] - print(e1, e1 - -7.7466756526056004) diff --git a/pyscf/fci/direct_spin0.py b/pyscf/fci/direct_spin0.py index 46a21fd790..5d80bc87ec 100644 --- a/pyscf/fci/direct_spin0.py +++ b/pyscf/fci/direct_spin0.py @@ -59,7 +59,8 @@ def contract_1e(f1e, fcivec, norb, nelec, link_index=None): # Handle computability. link_index should be (nparray, nparray) link_index = link_index[0] na, nlink = link_index.shape[:2] - assert (fcivec.size == na**2) + assert fcivec.size == na**2 + assert fcivec.dtype == f1e.dtype == numpy.float64 ci1 = numpy.empty_like(fcivec) f1e_tril = lib.pack_tril(f1e) libfci.FCIcontract_1e_spin0(f1e_tril.ctypes.data_as(ctypes.c_void_p), @@ -92,7 +93,8 @@ def contract_2e(eri, fcivec, norb, nelec, link_index=None): # Handle computability. link_index should be (nparray, nparray) link_index = link_index[0] na, nlink = link_index.shape[:2] - assert (fcivec.size == na**2) + assert fcivec.size == na**2 + assert fcivec.dtype == eri.dtype == numpy.float64 ci1 = numpy.empty((na,na)) libfci.FCIcontract_2e_spin0(eri.ctypes.data_as(ctypes.c_void_p), diff --git a/pyscf/fci/direct_spin1.py b/pyscf/fci/direct_spin1.py index 4b5b5a4946..5c63cfdf59 100644 --- a/pyscf/fci/direct_spin1.py +++ b/pyscf/fci/direct_spin1.py @@ -63,7 +63,8 @@ def contract_1e(f1e, fcivec, norb, nelec, link_index=None): link_indexa, link_indexb = _unpack(norb, nelec, link_index) na, nlinka = link_indexa.shape[:2] nb, nlinkb = link_indexb.shape[:2] - assert (fcivec.size == na*nb) + assert fcivec.size == na*nb + assert fcivec.dtype == f1e.dtype == numpy.float64 f1e_tril = lib.pack_tril(f1e) ci1 = numpy.zeros_like(fcivec) libfci.FCIcontract_a_1e(f1e_tril.ctypes.data_as(ctypes.c_void_p), @@ -123,11 +124,12 @@ def contract_2e(eri, fcivec, norb, nelec, link_index=None): See also :func:`direct_spin1.absorb_h1e` ''' fcivec = numpy.asarray(fcivec, order='C') - eri = ao2mo.restore(4, eri, norb) + eri = numpy.asarray(ao2mo.restore(4, eri, norb), order='C') link_indexa, link_indexb = _unpack(norb, nelec, link_index) na, nlinka = link_indexa.shape[:2] nb, nlinkb = link_indexb.shape[:2] - assert (fcivec.size == na*nb) + assert fcivec.size == na*nb + assert fcivec.dtype == eri.dtype == numpy.float64 ci1 = numpy.empty_like(fcivec) libfci.FCIcontract_2e_spin1(eri.ctypes.data_as(ctypes.c_void_p), @@ -146,12 +148,12 @@ def make_hdiag(h1e, eri, norb, nelec, compress=False): Kwargs: compress (bool) : whether to remove symmetry forbidden elements ''' - if h1e.dtype == numpy.complex128 or eri.dtype == numpy.complex128: + if not (h1e.dtype == eri.dtype == numpy.float64): raise NotImplementedError('Complex Hamiltonian') neleca, nelecb = _unpack_nelec(nelec) h1e = numpy.asarray(h1e, order='C') - eri = ao2mo.restore(1, eri, norb) + eri = numpy.asarray(ao2mo.restore(1, eri, norb), order='C') occslsta = occslstb = cistring.gen_occslst(range(norb), neleca) if neleca != nelecb: occslstb = cistring.gen_occslst(range(norb), nelecb) diff --git a/pyscf/mcscf/__init__.py b/pyscf/mcscf/__init__.py index 8d8b105ee6..82196ded33 100644 --- a/pyscf/mcscf/__init__.py +++ b/pyscf/mcscf/__init__.py @@ -81,42 +81,28 @@ Converge threshold. Default is 1e-7 conv_tol_grad : float Converge threshold for CI gradients and orbital rotation gradients. - Default is 1e-4 + If not specified, it is set to sqrt(conv_tol). max_stepsize : float The step size for orbital rotation. Small step size is prefered. - Default is 0.03. + Default is 0.02. (NOTE although the default step size is small enough for many systems, it happens that the orbital optimizor crosses the barriar of local minimum and converge to the neighbour solution, e.g. the CAS(4,4) for C2H4 in the test files. In these systems, adjusting max_stepsize, - max_ci_stepsize and max_cycle_micro, max_cycle_micro_inner and - ah_start_tol may be helpful) + max_ci_stepsize and max_cycle_micro and ah_start_tol may be helpful) >>> mc = mcscf.CASSCF(mf, 6, 6) >>> mc.max_stepsize = .01 >>> mc.max_cycle_micro = 1 >>> mc.max_cycle_macro = 100 - >>> mc.max_cycle_micro_inner = 1 >>> mc.ah_start_tol = 1e-6 - max_ci_stepsize : float - The max size for approximate CI updates. The approximate updates are - used in 1-step algorithm, to estimate the change of CI wavefunction wrt - the orbital rotation. Small step size is prefered. Default is 0.01. max_cycle_macro : int Max number of macro iterations. Default is 50. max_cycle_micro : int Max number of micro iterations in each macro iteration. Depending on systems, increasing this value might reduce the total macro - iterations. Generally, 2 - 3 steps should be enough. Default is 2. - max_cycle_micro_inner : int - Max number of steps for the orbital rotations allowed for the augmented - hessian solver. It can affect the actual size of orbital rotation. - Even with a small max_stepsize, a few max_cycle_micro_inner can - accumulate the rotation and leads to a significant change of the CAS - space. Depending on systems, increasing this value migh reduce the - total number of macro iterations. The value between 2 - 8 is preferred. - Default is 4. + iterations. Generally, 2 - 5 steps should be enough. Default is 4. frozen : int or list If integer is given, the inner-most orbitals are excluded from optimization. Given the orbital indices (0-based) in a list, any doubly occupied core @@ -131,7 +117,7 @@ Linear dependence threshold for AH solver. Default is 1e-16. ah_start_tol : flat, for AH solver. In AH solver, the orbital rotation is started without completely solving the AH problem. - This value is to control the start point. Default is 1e-4. + This value is to control the start point. Default is 2.5. ah_start_cycle : int, for AH solver. In AH solver, the orbital rotation is started without completely solving the AH problem. This value is to control the start point. Default is 3. diff --git a/pyscf/mcscf/mc1step.py b/pyscf/mcscf/mc1step.py index 020d6e6d88..0744ebb372 100644 --- a/pyscf/mcscf/mc1step.py +++ b/pyscf/mcscf/mc1step.py @@ -595,16 +595,16 @@ class CASSCF(casci.CASBase): Converge threshold. Default is 1e-7 conv_tol_grad : float Converge threshold for CI gradients and orbital rotation gradients. - Default is 1e-4 + If not specified, it is set to sqrt(conv_tol). max_stepsize : float The step size for orbital rotation. Small step (0.005 - 0.05) is prefered. - Default is 0.03. + Default is 0.02. max_cycle_macro : int Max number of macro iterations. Default is 50. max_cycle_micro : int Max number of micro iterations in each macro iteration. Depending on systems, increasing this value might reduce the total macro - iterations. Generally, 2 - 5 steps should be enough. Default is 3. + iterations. Generally, 2 - 5 steps should be enough. Default is 4. small_rot_tol : float Threshold for orbital rotation to be considered small. If the largest orbital rotation is smaller than this value, the CI solver will restart from the @@ -620,10 +620,10 @@ class CASSCF(casci.CASBase): Linear dependence threshold for AH solver. Default is 1e-14. ah_start_tol : flat, for AH solver. In AH solver, the orbital rotation is started without completely solving the AH problem. - This value is to control the start point. Default is 0.2. + This value is to control the start point. Default is 2.5. ah_start_cycle : int, for AH solver. In AH solver, the orbital rotation is started without completely solving the AH problem. - This value is to control the start point. Default is 2. + This value is to control the start point. Default is 3. ``ah_conv_tol``, ``ah_max_cycle``, ``ah_lindep``, ``ah_start_tol`` and ``ah_start_cycle`` can affect the accuracy and performance of CASSCF solver. Lower From 4150f854ff293c89ce3bb1907c9d0710a960e66d Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Mon, 11 Mar 2024 22:24:56 -0700 Subject: [PATCH 22/44] Improve to_gpu (#2124) * Update to_gpu * Improve to_ks/to_hf * Update to_gpu for df, solvent, qmm, and other methods * Fix fci initialization --- pyscf/cc/ccsd.py | 2 + pyscf/cc/dfccsd.py | 2 + pyscf/cc/gccsd.py | 2 + pyscf/cc/uccsd.py | 2 + pyscf/ci/cisd.py | 2 + pyscf/df/addons.py | 2 +- pyscf/df/df.py | 4 +- pyscf/df/df_jk.py | 3 +- pyscf/df/grad/casscf.py | 2 + pyscf/df/grad/rhf.py | 8 ++-- pyscf/df/grad/rks.py | 4 +- pyscf/df/grad/sacasscf.py | 2 + pyscf/df/grad/uhf.py | 2 + pyscf/df/grad/uks.py | 2 + pyscf/df/hessian/rhf.py | 5 +-- pyscf/df/hessian/rks.py | 5 +-- pyscf/df/hessian/uhf.py | 1 + pyscf/df/hessian/uks.py | 1 + pyscf/dft/dks.py | 2 + pyscf/dft/gen_grid.py | 4 +- pyscf/dft/gks.py | 3 +- pyscf/dft/gks_symm.py | 3 ++ pyscf/dft/numint.py | 5 +-- pyscf/dft/rks.py | 7 +-- pyscf/dft/rks_symm.py | 7 ++- pyscf/dft/roks.py | 9 +--- pyscf/dft/uks.py | 8 +--- pyscf/dft/uks_symm.py | 3 ++ pyscf/fci/direct_spin1.py | 2 + pyscf/grad/casci.py | 2 + pyscf/grad/casscf.py | 2 + pyscf/grad/ccsd.py | 2 + pyscf/grad/cisd.py | 2 + pyscf/grad/dhf.py | 2 + pyscf/grad/mp2.py | 2 + pyscf/grad/rhf.py | 4 +- pyscf/grad/rks.py | 4 +- pyscf/grad/tdrhf.py | 2 + pyscf/grad/uhf.py | 3 +- pyscf/grad/uks.py | 3 +- pyscf/hessian/rhf.py | 7 +-- pyscf/hessian/rks.py | 5 +-- pyscf/hessian/uhf.py | 3 +- pyscf/hessian/uks.py | 4 +- pyscf/lib/diis.py | 2 + pyscf/lib/misc.py | 74 +++++++++++++++++++++++++++---- pyscf/mcscf/casci.py | 4 +- pyscf/mcscf/casci_symm.py | 4 +- pyscf/mcscf/mc1step.py | 4 +- pyscf/mcscf/newton_casscf_symm.py | 2 +- pyscf/mcscf/ucasci.py | 2 +- pyscf/mcscf/umc1step.py | 2 +- pyscf/mp/dfmp2.py | 2 + pyscf/mp/dfmp2_native.py | 2 + pyscf/mp/dfump2_native.py | 2 + pyscf/mp/gmp2.py | 2 + pyscf/mp/mp2.py | 2 + pyscf/mp/ump2.py | 2 + pyscf/pbc/cc/kccsd.py | 2 + pyscf/pbc/cc/kccsd_rhf.py | 2 + pyscf/pbc/cc/kccsd_uhf.py | 2 + pyscf/pbc/df/df.py | 2 + pyscf/pbc/df/fft.py | 2 + pyscf/pbc/dft/gen_grid.py | 4 ++ pyscf/pbc/dft/gks.py | 2 + pyscf/pbc/dft/kgks.py | 2 + pyscf/pbc/dft/krks.py | 2 + pyscf/pbc/dft/kroks.py | 2 + pyscf/pbc/dft/kuks.py | 2 + pyscf/pbc/dft/numint.py | 4 ++ pyscf/pbc/dft/rks.py | 2 + pyscf/pbc/dft/roks.py | 2 + pyscf/pbc/dft/uks.py | 2 + pyscf/pbc/mp/kmp2.py | 2 + pyscf/pbc/scf/ghf.py | 2 + pyscf/pbc/scf/hf.py | 1 + pyscf/pbc/scf/kghf.py | 2 + pyscf/pbc/scf/khf.py | 1 + pyscf/pbc/scf/krohf.py | 1 + pyscf/pbc/scf/kuhf.py | 1 + pyscf/pbc/scf/rohf.py | 1 + pyscf/pbc/scf/rsjk.py | 2 + pyscf/pbc/scf/uhf.py | 1 + pyscf/qmmm/itrf.py | 12 +++++ pyscf/scf/dhf.py | 8 ++-- pyscf/scf/diis.py | 4 -- pyscf/scf/ghf.py | 10 ++--- pyscf/scf/ghf_symm.py | 3 +- pyscf/scf/hf.py | 15 ++++--- pyscf/scf/hf_symm.py | 6 +-- pyscf/scf/rohf.py | 4 +- pyscf/scf/uhf.py | 7 ++- pyscf/scf/uhf_symm.py | 3 +- pyscf/sgx/sgx.py | 5 +++ pyscf/solvent/_attach_solvent.py | 27 ++++++++++- pyscf/solvent/ddcosmo.py | 2 + pyscf/soscf/newton_ah.py | 3 ++ pyscf/tdscf/rhf.py | 7 +++ pyscf/tdscf/uhf.py | 4 ++ pyscf/x2c/sfx2c1e.py | 4 ++ pyscf/x2c/x2c.py | 8 ++++ 101 files changed, 307 insertions(+), 130 deletions(-) diff --git a/pyscf/cc/ccsd.py b/pyscf/cc/ccsd.py index 460237208a..65ee49486f 100644 --- a/pyscf/cc/ccsd.py +++ b/pyscf/cc/ccsd.py @@ -1365,6 +1365,8 @@ def get_d2_diagnostic(self, t2=None): if t2 is None: t2 = self.t2 return get_d2_diagnostic(t2) + to_gpu = lib.to_gpu + CC = RCCSD = CCSD diff --git a/pyscf/cc/dfccsd.py b/pyscf/cc/dfccsd.py index f144734067..e95d4fae6e 100644 --- a/pyscf/cc/dfccsd.py +++ b/pyscf/cc/dfccsd.py @@ -48,6 +48,8 @@ def _add_vvvv(self, t1, t2, eris, out=None, with_ovvv=False, t2sym=None): assert (not self.direct) return ccsd.CCSD._add_vvvv(self, t1, t2, eris, out, with_ovvv, t2sym) + to_gpu = lib.to_gpu + def _contract_vvvv_t2(mycc, mol, vvL, t2, out=None, verbose=None): '''Ht2 = numpy.einsum('ijcd,acdb->ijab', t2, vvvv) diff --git a/pyscf/cc/gccsd.py b/pyscf/cc/gccsd.py index 2db369cfd5..dda254966d 100644 --- a/pyscf/cc/gccsd.py +++ b/pyscf/cc/gccsd.py @@ -289,6 +289,8 @@ def spin2spatial(self, tx, orbspin=None): orbspin = orbspin[self.get_frozen_mask()] return spin2spatial(tx, orbspin) + to_gpu = lib.to_gpu + CCSD = GCCSD diff --git a/pyscf/cc/uccsd.py b/pyscf/cc/uccsd.py index a6cbe05519..5bd7b50a32 100644 --- a/pyscf/cc/uccsd.py +++ b/pyscf/cc/uccsd.py @@ -758,6 +758,8 @@ def vector_size(self, nmo=None, nocc=None): def amplitudes_from_rccsd(self, t1, t2): return amplitudes_from_rccsd(t1, t2) + to_gpu = lib.to_gpu + CCSD = UCCSD diff --git a/pyscf/ci/cisd.py b/pyscf/ci/cisd.py index cbbab7859f..629b00da6d 100644 --- a/pyscf/ci/cisd.py +++ b/pyscf/ci/cisd.py @@ -1131,6 +1131,8 @@ def nuc_grad_method(self): from pyscf.grad import cisd return cisd.Gradients(self) + to_gpu = lib.to_gpu + class RCISD(CISD): pass diff --git a/pyscf/df/addons.py b/pyscf/df/addons.py index 5cd32e7eae..684204b508 100644 --- a/pyscf/df/addons.py +++ b/pyscf/df/addons.py @@ -152,7 +152,7 @@ def make_auxbasis(mol, mp2fit=False): _basis.update(mol.basis) del (_basis['default']) else: - _basis = mol._basis + _basis = mol._basis or {} auxbasis = {} for k in _basis: diff --git a/pyscf/df/df.py b/pyscf/df/df.py index 403dd3c37e..82b0d83daa 100644 --- a/pyscf/df/df.py +++ b/pyscf/df/df.py @@ -308,9 +308,7 @@ def range_coulomb(self, omega): if auxmol_omega is not None: auxmol.omega = auxmol_omega - def to_gpu(self): - from gpu4pyscf.df.df import DF as DF - return lib.to_gpu(self.__class__.reset(self.view(DF))) + to_gpu = lib.to_gpu GDF = DF diff --git a/pyscf/df/df_jk.py b/pyscf/df/df_jk.py index 8d21db6aa2..2c02564efd 100644 --- a/pyscf/df/df_jk.py +++ b/pyscf/df/df_jk.py @@ -228,8 +228,7 @@ def CASSCF(self, ncas, nelecas, auxbasis=None, ncore=None, frozen=None): def to_gpu(self): obj = self.undo_df().to_gpu().density_fit() - obj.__dict__.update(self.__dict__) - return lib.to_gpu(obj) + return lib.to_gpu(self, obj) def get_jk(dfobj, dm, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-13): diff --git a/pyscf/df/grad/casscf.py b/pyscf/df/grad/casscf.py index 56815f96b7..243a100f83 100644 --- a/pyscf/df/grad/casscf.py +++ b/pyscf/df/grad/casscf.py @@ -224,6 +224,8 @@ def _finalize(self): as_scanner = as_scanner + to_gpu = lib.to_gpu + Grad = Gradients #from pyscf import mcscf diff --git a/pyscf/df/grad/rhf.py b/pyscf/df/grad/rhf.py index 0c945d8d21..91ccd3e543 100644 --- a/pyscf/df/grad/rhf.py +++ b/pyscf/df/grad/rhf.py @@ -482,12 +482,14 @@ class Gradients(rhf_grad.Gradients): _keys = {'with_df', 'auxbasis_response'} def __init__(self, mf): - assert isinstance(mf, df.df_jk._DFHF) # Whether to include the response of DF auxiliary basis when computing # nuclear gradients of J/K matrices self.auxbasis_response = True rhf_grad.Gradients.__init__(self, mf) + def check_sanity(self): + assert isinstance(self.base, df.df_jk._DFHF) + def get_jk(self, mol=None, dm=None, hermi=0, with_j=True, with_k=True, omega=None): if omega is None: @@ -521,8 +523,6 @@ def extra_force(self, atom_id, envs): else: return 0 - def to_gpu(self): - from gpu4pyscf.df.grad.rhf import Gradients - return lib.to_gpu(self.view(Gradients)) + to_gpu = lib.to_gpu Grad = Gradients diff --git a/pyscf/df/grad/rks.py b/pyscf/df/grad/rks.py index aaa8f663ea..a53d3d0ae6 100644 --- a/pyscf/df/grad/rks.py +++ b/pyscf/df/grad/rks.py @@ -123,8 +123,6 @@ def extra_force(self, atom_id, envs): e1 += envs['vhf'].aux[atom_id] return e1 - def to_gpu(self): - from gpu4pyscf.df.grad.rks import Gradients - return lib.to_gpu(self.view(Gradients)) + to_gpu = lib.to_gpu Grad = Gradients diff --git a/pyscf/df/grad/sacasscf.py b/pyscf/df/grad/sacasscf.py index 1d985d993c..713f5f8e35 100644 --- a/pyscf/df/grad/sacasscf.py +++ b/pyscf/df/grad/sacasscf.py @@ -370,3 +370,5 @@ def kernel (self, **kwargs): def get_LdotJnuc (self, Lvec, **kwargs): with lib.temporary_env (sacasscf_grad, Lci_dot_dgci_dx=Lci_dot_dgci_dx, Lorb_dot_dgorb_dx=Lorb_dot_dgorb_dx): return sacasscf_grad.Gradients.get_LdotJnuc (self, Lvec, **kwargs) + + to_gpu = lib.to_gpu diff --git a/pyscf/df/grad/uhf.py b/pyscf/df/grad/uhf.py index 0eec773b0d..af2e048591 100644 --- a/pyscf/df/grad/uhf.py +++ b/pyscf/df/grad/uhf.py @@ -60,4 +60,6 @@ def extra_force(self, atom_id, envs): else: return 0 + to_gpu = lib.to_gpu + Grad = Gradients diff --git a/pyscf/df/grad/uks.py b/pyscf/df/grad/uks.py index e6de663a95..9fa6f5cdf5 100644 --- a/pyscf/df/grad/uks.py +++ b/pyscf/df/grad/uks.py @@ -124,4 +124,6 @@ def extra_force(self, atom_id, envs): e1 += envs['vhf'].aux[atom_id] return e1 + to_gpu = lib.to_gpu + Grad = Gradients diff --git a/pyscf/df/hessian/rhf.py b/pyscf/df/hessian/rhf.py index 5c9f7a17ac..95bc7f9dcf 100644 --- a/pyscf/df/hessian/rhf.py +++ b/pyscf/df/hessian/rhf.py @@ -480,10 +480,7 @@ def __init__(self, mf): partial_hess_elec = partial_hess_elec make_h1 = make_h1 - - def to_gpu(self): - from gpu4pyscf.df.hessian.rhf import Hessian - return lib.to_gpu(self.view(Hessian)) + to_gpu = lib.to_gpu #TODO: Insert into DF class diff --git a/pyscf/df/hessian/rks.py b/pyscf/df/hessian/rks.py index 79816d8cee..30b59fc8d1 100644 --- a/pyscf/df/hessian/rks.py +++ b/pyscf/df/hessian/rks.py @@ -126,10 +126,7 @@ def __init__(self, mf): partial_hess_elec = partial_hess_elec make_h1 = make_h1 - - def to_gpu(self): - from gpu4pyscf.df.hessian.rks import Hessian - return lib.to_gpu(self.view(Hessian)) + to_gpu = lib.to_gpu if __name__ == '__main__': diff --git a/pyscf/df/hessian/uhf.py b/pyscf/df/hessian/uhf.py index 5cb20240f8..b252f99953 100644 --- a/pyscf/df/hessian/uhf.py +++ b/pyscf/df/hessian/uhf.py @@ -531,6 +531,7 @@ def __init__(self, mf): partial_hess_elec = partial_hess_elec make_h1 = make_h1 + to_gpu = lib.to_gpu #TODO: Insert into DF class diff --git a/pyscf/df/hessian/uks.py b/pyscf/df/hessian/uks.py index 1afa995973..92624a128f 100644 --- a/pyscf/df/hessian/uks.py +++ b/pyscf/df/hessian/uks.py @@ -139,6 +139,7 @@ def __init__(self, mf): partial_hess_elec = partial_hess_elec make_h1 = make_h1 + to_gpu = lib.to_gpu if __name__ == '__main__': diff --git a/pyscf/dft/dks.py b/pyscf/dft/dks.py index 783c3c546b..d1f35803b9 100644 --- a/pyscf/dft/dks.py +++ b/pyscf/dft/dks.py @@ -143,6 +143,8 @@ def x2c1e(self): return x2chf x2c = x2c1e + to_gpu = lib.to_gpu + UKS = UDKS = DKS class RDKS(DKS, dhf.RDHF): diff --git a/pyscf/dft/gen_grid.py b/pyscf/dft/gen_grid.py index 77c1c781fd..b97ae96cde 100644 --- a/pyscf/dft/gen_grid.py +++ b/pyscf/dft/gen_grid.py @@ -587,9 +587,7 @@ def prune_by_density_(self, rho, threshold=0): self.screen_index = self.non0tab return self - def to_gpu(self): - from gpu4pyscf.dft.gen_grid import Grids - return lib.to_gpu(self.view(Grids)) + to_gpu = lib.to_gpu def _default_rad(nuc, level=3): diff --git a/pyscf/dft/gks.py b/pyscf/dft/gks.py index edfd29e25b..26c6b902e9 100644 --- a/pyscf/dft/gks.py +++ b/pyscf/dft/gks.py @@ -177,8 +177,7 @@ def to_hf(self): '''Convert to GHF object.''' return self._transfer_attrs_(self.mol.GHF()) - def to_gpu(self): - raise NotImplementedError + to_gpu = lib.to_gpu if __name__ == '__main__': diff --git a/pyscf/dft/gks_symm.py b/pyscf/dft/gks_symm.py index 60ff1c2309..1c599a247f 100644 --- a/pyscf/dft/gks_symm.py +++ b/pyscf/dft/gks_symm.py @@ -20,6 +20,7 @@ Generalized Kohn-Sham ''' +from pyscf import lib from pyscf.lib import logger from pyscf.scf import ghf_symm from pyscf.dft import gks @@ -57,6 +58,8 @@ def collinear(self, val): def nuc_grad_method(self): raise NotImplementedError + to_gpu = lib.to_gpu + if __name__ == '__main__': import numpy diff --git a/pyscf/dft/numint.py b/pyscf/dft/numint.py index 0b3cfdc74c..1716042118 100644 --- a/pyscf/dft/numint.py +++ b/pyscf/dft/numint.py @@ -2865,10 +2865,7 @@ def make_rho(idm, ao, sindex, xctype): with_lapl) return make_rho, ndms, nao - def to_gpu(self): - from gpu4pyscf.dft.numint import NumInt - # Note: gpu4pyscf NumInt initializes additional things in __init__.py - return NumInt() + to_gpu = lib.to_gpu _NumInt = NumInt diff --git a/pyscf/dft/rks.py b/pyscf/dft/rks.py index 89f7e5d6d0..2a5d82c2d1 100644 --- a/pyscf/dft/rks.py +++ b/pyscf/dft/rks.py @@ -531,9 +531,4 @@ def to_hf(self): '''Convert to RHF object.''' return self._transfer_attrs_(self.mol.RHF()) - def to_gpu(self): - from gpu4pyscf.dft.rks import RKS - obj = lib.to_gpu(hf.SCF.reset(self.view(RKS))) - # Attributes only defined in gpu4pyscf.RKS - obj.screen_tol = 1e-14 - return obj + to_gpu = lib.to_gpu diff --git a/pyscf/dft/rks_symm.py b/pyscf/dft/rks_symm.py index f47071f37e..9956532e43 100644 --- a/pyscf/dft/rks_symm.py +++ b/pyscf/dft/rks_symm.py @@ -20,6 +20,7 @@ Non-relativistic Restricted Kohn-Sham ''' +from pyscf import lib from pyscf.scf import hf_symm from pyscf.dft import rks from pyscf.dft import uks @@ -46,12 +47,14 @@ def nuc_grad_method(self): from pyscf.grad import rks return rks.Gradients(self) + to_gpu = lib.to_gpu + RKS = SymAdaptedRKS class SymAdaptedROKS(rks.KohnShamDFT, hf_symm.SymAdaptedROHF): ''' Restricted Kohn-Sham ''' - def __init__(self, mol, xc='LDA,VWN'): + def __init__(self, mol=None, xc='LDA,VWN'): hf_symm.ROHF.__init__(self, mol) rks.KohnShamDFT.__init__(self, xc) @@ -70,6 +73,8 @@ def nuc_grad_method(self): from pyscf.grad import roks return roks.Gradients(self) + to_gpu = lib.to_gpu + ROKS = SymAdaptedROKS diff --git a/pyscf/dft/roks.py b/pyscf/dft/roks.py index 3f44d5227e..1247757207 100644 --- a/pyscf/dft/roks.py +++ b/pyscf/dft/roks.py @@ -65,14 +65,7 @@ def to_hf(self): '''Convert to ROHF object.''' return self._transfer_attrs_(self.mol.ROHF()) - def to_gpu(self): - from pyscf.scf.hf import SCF - from gpu4pyscf.dft.roks import ROKS - obj = lib.to_gpu(SCF.reset(self.view(ROKS))) - # Attributes only defined in gpu4pyscf.RKS - obj.screen_tol = 1e-14 - obj.disp = None - return obj + to_gpu = lib.to_gpu if __name__ == '__main__': diff --git a/pyscf/dft/uks.py b/pyscf/dft/uks.py index cf2f7d7486..9eee8202a7 100644 --- a/pyscf/dft/uks.py +++ b/pyscf/dft/uks.py @@ -197,10 +197,4 @@ def to_hf(self): '''Convert to UHF object.''' return self._transfer_attrs_(self.mol.UHF()) - def to_gpu(self): - from pyscf.scf.hf import SCF - from gpu4pyscf.dft.uks import UKS - obj = lib.to_gpu(SCF.reset(self.view(UKS))) - # Attributes only defined in gpu4pyscf.RKS - obj.screen_tol = 1e-14 - return obj + to_gpu = lib.to_gpu diff --git a/pyscf/dft/uks_symm.py b/pyscf/dft/uks_symm.py index c6f9597077..7ffa216b10 100644 --- a/pyscf/dft/uks_symm.py +++ b/pyscf/dft/uks_symm.py @@ -20,6 +20,7 @@ Non-relativistic Unrestricted Kohn-Sham ''' +from pyscf import lib from pyscf.lib import logger from pyscf.scf import uhf_symm from pyscf.dft import uks @@ -47,6 +48,8 @@ def nuc_grad_method(self): from pyscf.grad import uks return uks.Gradients(self) + to_gpu = lib.to_gpu + UKS = SymAdaptedUKS diff --git a/pyscf/fci/direct_spin1.py b/pyscf/fci/direct_spin1.py index 5c63cfdf59..7f8d04f3d3 100644 --- a/pyscf/fci/direct_spin1.py +++ b/pyscf/fci/direct_spin1.py @@ -945,6 +945,8 @@ def transform_ci_for_orbital_rotation(self, fcivec, norb, nelec, u): nelec = _unpack_nelec(nelec, self.spin) return addons.transform_ci_for_orbital_rotation(fcivec, norb, nelec, u) + to_gpu = lib.to_gpu + FCI = FCISolver class FCIvector(numpy.ndarray): diff --git a/pyscf/grad/casci.py b/pyscf/grad/casci.py index ccc29af896..d88adc891a 100644 --- a/pyscf/grad/casci.py +++ b/pyscf/grad/casci.py @@ -342,6 +342,8 @@ def _finalize(self): as_scanner = as_scanner + to_gpu = lib.to_gpu + Grad = Gradients from pyscf import mcscf diff --git a/pyscf/grad/casscf.py b/pyscf/grad/casscf.py index 0ce78a82b3..466a0bd007 100644 --- a/pyscf/grad/casscf.py +++ b/pyscf/grad/casscf.py @@ -220,6 +220,8 @@ def _finalize(self): as_scanner = as_scanner + to_gpu = lib.to_gpu + Grad = Gradients from pyscf import mcscf diff --git a/pyscf/grad/ccsd.py b/pyscf/grad/ccsd.py index 6ab4a5373b..18b67f1e36 100644 --- a/pyscf/grad/ccsd.py +++ b/pyscf/grad/ccsd.py @@ -456,6 +456,8 @@ def grad_nuc(self, mol=None, atmlst=None): as_scanner = as_scanner + to_gpu = lib.to_gpu + Grad = Gradients ccsd.CCSD.Gradients = lib.class_as_method(Gradients) diff --git a/pyscf/grad/cisd.py b/pyscf/grad/cisd.py index 1a57c86e8f..da8099de23 100644 --- a/pyscf/grad/cisd.py +++ b/pyscf/grad/cisd.py @@ -203,6 +203,8 @@ def _finalize(self): as_scanner = as_scanner + to_gpu = lib.to_gpu + Grad = Gradients cisd.CISD.Gradients = lib.class_as_method(Gradients) diff --git a/pyscf/grad/dhf.py b/pyscf/grad/dhf.py index 88913bf56b..aefb66dfee 100644 --- a/pyscf/grad/dhf.py +++ b/pyscf/grad/dhf.py @@ -217,6 +217,8 @@ def kernel(self, mo_energy=None, mo_coeff=None, mo_occ=None, atmlst=None): as_scanner = rhf_grad.as_scanner + to_gpu = lib.to_gpu + Grad = Gradients from pyscf import scf diff --git a/pyscf/grad/mp2.py b/pyscf/grad/mp2.py index 65ff7c8834..5a767a20c2 100644 --- a/pyscf/grad/mp2.py +++ b/pyscf/grad/mp2.py @@ -309,6 +309,8 @@ def grad_nuc(self, mol=None, atmlst=None): as_scanner = as_scanner + to_gpu = lib.to_gpu + Grad = Gradients # Inject to RMP2 class diff --git a/pyscf/grad/rhf.py b/pyscf/grad/rhf.py index e5eaf7f7f8..e45e3b5ed9 100644 --- a/pyscf/grad/rhf.py +++ b/pyscf/grad/rhf.py @@ -463,9 +463,7 @@ def make_rdm1e(self, mo_energy=None, mo_coeff=None, mo_occ=None): grad_elec = grad_elec - def to_gpu(self): - from gpu4pyscf.grad.rhf import Gradients - return lib.to_gpu(self.view(Gradients)) + to_gpu = lib.to_gpu Grad = Gradients diff --git a/pyscf/grad/rks.py b/pyscf/grad/rks.py index b3c9c31ded..bb0198140d 100644 --- a/pyscf/grad/rks.py +++ b/pyscf/grad/rks.py @@ -622,9 +622,7 @@ def extra_force(self, atom_id, envs): else: return 0 - def to_gpu(self): - from gpu4pyscf.grad.rks import Gradients - return lib.to_gpu(self.view(Gradients)) + to_gpu = lib.to_gpu Grad = Gradients diff --git a/pyscf/grad/tdrhf.py b/pyscf/grad/tdrhf.py index bfb08aedba..2d4c5bbb4a 100644 --- a/pyscf/grad/tdrhf.py +++ b/pyscf/grad/tdrhf.py @@ -325,6 +325,8 @@ def _finalize(self): as_scanner = as_scanner + to_gpu = lib.to_gpu + Grad = Gradients from pyscf import tdscf diff --git a/pyscf/grad/uhf.py b/pyscf/grad/uhf.py index c56878053b..949b7abf44 100644 --- a/pyscf/grad/uhf.py +++ b/pyscf/grad/uhf.py @@ -106,8 +106,7 @@ def make_rdm1e(self, mo_energy=None, mo_coeff=None, mo_occ=None): grad_elec = grad_elec - def to_gpu(self): - raise NotImplementedError + to_gpu = lib.to_gpu Grad = Gradients diff --git a/pyscf/grad/uks.py b/pyscf/grad/uks.py index 2f59f9dcc6..cc73955814 100644 --- a/pyscf/grad/uks.py +++ b/pyscf/grad/uks.py @@ -275,8 +275,7 @@ def extra_force(self, atom_id, envs): else: return 0 - def to_gpu(self): - raise NotImplementedError + to_gpu = lib.to_gpu Grad = Gradients diff --git a/pyscf/hessian/rhf.py b/pyscf/hessian/rhf.py index a11fc9d7ff..9736eeff5c 100644 --- a/pyscf/hessian/rhf.py +++ b/pyscf/hessian/rhf.py @@ -487,9 +487,9 @@ def __init__(self, scf_method): self.verbose = scf_method.verbose self.stdout = scf_method.stdout self.mol = scf_method.mol - self.base = scf_method self.chkfile = scf_method.chkfile self.max_memory = self.mol.max_memory + self.base = scf_method self.atmlst = range(self.mol.natm) self.de = numpy.zeros((0,0,3,3)) # (A,B,dR_A,dR_B) @@ -608,10 +608,7 @@ class Hessian(HessianBase): partial_hess_elec = partial_hess_elec hess_elec = hess_elec make_h1 = make_h1 - - def to_gpu(self): - from gpu4pyscf.hessian.rhf import Hessian - return lib.to_gpu(self.view(Hessian)) + to_gpu = lib.to_gpu # Inject to RHF class from pyscf import scf diff --git a/pyscf/hessian/rks.py b/pyscf/hessian/rks.py index 41bcb63bd2..31ee13115c 100644 --- a/pyscf/hessian/rks.py +++ b/pyscf/hessian/rks.py @@ -590,10 +590,7 @@ def __init__(self, mf): partial_hess_elec = partial_hess_elec hess_elec = rhf_hess.hess_elec make_h1 = make_h1 - - def to_gpu(self): - from gpu4pyscf.hessian.rks import Hessian - return lib.to_gpu(self.view(Hessian)) + to_gpu = lib.to_gpu from pyscf import dft dft.rks.RKS.Hessian = dft.rks_symm.RKS.Hessian = lib.class_as_method(Hessian) diff --git a/pyscf/hessian/uhf.py b/pyscf/hessian/uhf.py index 1b30e264ee..4b97fbf6f9 100644 --- a/pyscf/hessian/uhf.py +++ b/pyscf/hessian/uhf.py @@ -454,8 +454,7 @@ def solve_mo1(self, mo_energy, mo_coeff, mo_occ, h1ao_or_chkfile, fx, atmlst, max_memory, verbose, max_cycle=self.max_cycle, level_shift=self.level_shift) - def to_gpu(self): - raise NotImplementedError + to_gpu = lib.to_gpu from pyscf import scf scf.uhf.UHF.Hessian = lib.class_as_method(Hessian) diff --git a/pyscf/hessian/uks.py b/pyscf/hessian/uks.py index a911f661b4..2c3941452b 100644 --- a/pyscf/hessian/uks.py +++ b/pyscf/hessian/uks.py @@ -667,9 +667,7 @@ def __init__(self, mf): solve_mo1 = uhf_hess.Hessian.solve_mo1 partial_hess_elec = partial_hess_elec make_h1 = make_h1 - - def to_gpu(self): - raise NotImplementedError + to_gpu = lib.to_gpu from pyscf import dft dft.uks.UKS.Hessian = dft.uks_symm.UKS.Hessian = lib.class_as_method(Hessian) diff --git a/pyscf/lib/diis.py b/pyscf/lib/diis.py index f6e1cbd9d9..db6b06d8db 100644 --- a/pyscf/lib/diis.py +++ b/pyscf/lib/diis.py @@ -334,6 +334,8 @@ def restore(self, filename, inplace=True): self._H[1:nd+1,1:nd+1] = e_mat return self + to_gpu = misc.to_gpu + def restore(filename): '''Restore/construct diis object based on a diis file''' diff --git a/pyscf/lib/misc.py b/pyscf/lib/misc.py index 46ebf5edf0..9819f2a978 100644 --- a/pyscf/lib/misc.py +++ b/pyscf/lib/misc.py @@ -1360,17 +1360,73 @@ def isintsequence(obj): are_ints = are_ints and isinteger(i) return are_ints -def to_gpu(method): - '''Recursively converts all attributes of a method to cupy objects or - gpu4pyscf objects. +class _OmniObject: + '''Class with default attributes. When accessing an attribute that is not + initialized, a default value will be returned than raising an AttributeError. + ''' + verbose = 0 + max_memory = param.MAX_MEMORY + stdout = sys.stdout + + def __init__(self, default_factory=None): + self._default = default_factory + + def __getattr__(self, key): + return self._default + +# Many methods requires a mol or mf object in initialization. +# These objects can be as the default arguments for these methods. +# Then class can be instantiated easily like cls(omniobj) in the following +# to_gpu function. +omniobj = _OmniObject() +omniobj.mol = omniobj +omniobj._scf = omniobj +omniobj.base = omniobj + +def to_gpu(method, out=None): + '''Convert a method to its corresponding GPU variant, and recursively + converts all attributes of a method to cupy objects or gpu4pyscf objects. ''' import cupy from pyscf import gto - for key, val in method.__dict__.items(): - if isinstance(val, gto.MoleBase): - continue + + # If a GPU class inherits a CPU code, the "to_gpu" method may be resolved + # and available in the GPU class. Skip the conversion in this case. + if method.__module__.startswith('gpu4pyscf'): + return method + + if out is None: + try: + import gpu4pyscf + except ImportError: + print('Library gpu4pyscf not found. You can install this package via\n' + ' pip install gpu4pyscf-cuda11x\n' + 'See more installation info at https://github.com/pyscf/gpu4pyscf') + raise + + # TODO: Is it necessary to implement scanner in gpu4pyscf? + if isinstance(method, (SinglePointScanner, GradScanner)): + method = method.undo_scanner() + + import import_module + mod = import_module(method.__module__.replace('pyscf', 'gpu4pyscf')) + cls = getattr(mod, method.__class__.__name__) + # A temporary GPU instance. This ensures to initialize private + # attributes that are only available for GPU code. + out = cls(omniobj) + + # Convert only the keys that are defined in the corresponding GPU class + cls_keys = [getattr(cls, '_keys', ()) for cls in out.__class__.__mro__[:-1]] + out_keys = set(out.__dict__).union(*cls_keys) + # Only overwrite the attributes of the same name. + keys = set(method.__dict__).intersection(out_keys) + + for key in keys: + val = getattr(method, key) if isinstance(val, numpy.ndarray): - setattr(method, key, cupy.asarray(val)) + val = cupy.asarray(val) elif hasattr(val, 'to_gpu'): - setattr(method, key, val.to_gpu()) - return method + val = val.to_gpu() + setattr(out, key, val) + out.reset() + return out diff --git a/pyscf/mcscf/casci.py b/pyscf/mcscf/casci.py index 3f00365b48..2d96bfbc72 100644 --- a/pyscf/mcscf/casci.py +++ b/pyscf/mcscf/casci.py @@ -770,7 +770,7 @@ class CASBase(lib.StreamObject): 'e_tot', 'e_cas', 'ci', 'mo_coeff', 'mo_energy', 'mo_occ', 'converged', } - def __init__(self, mf_or_mol, ncas, nelecas, ncore=None): + def __init__(self, mf_or_mol, ncas=0, nelecas=0, ncore=None): if isinstance(mf_or_mol, gto.Mole): mf = scf.RHF(mf_or_mol) else: @@ -1170,6 +1170,8 @@ def nuc_grad_method(self): from pyscf.grad import casci return casci.Gradients(self) + to_gpu = lib.to_gpu + scf.hf.RHF.CASCI = scf.rohf.ROHF.CASCI = lib.class_as_method(CASCI) scf.uhf.UHF.CASCI = None diff --git a/pyscf/mcscf/casci_symm.py b/pyscf/mcscf/casci_symm.py index a28e1dab9e..df70b31886 100644 --- a/pyscf/mcscf/casci_symm.py +++ b/pyscf/mcscf/casci_symm.py @@ -28,7 +28,7 @@ from pyscf.scf.hf_symm import map_degeneracy class SymAdaptedCASCI(casci.CASCI): - def __init__(self, mf_or_mol, ncas, nelecas, ncore=None): + def __init__(self, mf_or_mol, ncas=0, nelecas=0, ncore=None): casci.CASCI.__init__(self, mf_or_mol, ncas, nelecas, ncore) assert (self.mol.symmetry) @@ -74,6 +74,8 @@ def sort_mo_by_irrep(self, cas_irrep_nocc, return addons.sort_mo_by_irrep(self, mo_coeff, cas_irrep_nocc, cas_irrep_ncore, s) + to_gpu = lib.to_gpu + CASCI = SymAdaptedCASCI def eig(mat, orbsym): diff --git a/pyscf/mcscf/mc1step.py b/pyscf/mcscf/mc1step.py index 0744ebb372..4a11427959 100644 --- a/pyscf/mcscf/mc1step.py +++ b/pyscf/mcscf/mc1step.py @@ -755,7 +755,7 @@ class CASSCF(casci.CASBase): 'mo_energy', 'converged', } - def __init__(self, mf_or_mol, ncas, nelecas, ncore=None, frozen=None): + def __init__(self, mf_or_mol, ncas=0, nelecas=0, ncore=None, frozen=None): casci.CASBase.__init__(self, mf_or_mol, ncas, nelecas, ncore) self.frozen = frozen @@ -1296,6 +1296,8 @@ def reset(self, mol=None): casci.CASBase.reset(self, mol=mol) self._max_stepsize = None + to_gpu = lib.to_gpu + scf.hf.RHF.CASSCF = scf.rohf.ROHF.CASSCF = lib.class_as_method(CASSCF) scf.uhf.UHF.CASSCF = None diff --git a/pyscf/mcscf/newton_casscf_symm.py b/pyscf/mcscf/newton_casscf_symm.py index c8419933e1..eb526337b3 100644 --- a/pyscf/mcscf/newton_casscf_symm.py +++ b/pyscf/mcscf/newton_casscf_symm.py @@ -27,7 +27,7 @@ class CASSCF(newton_casscf.CASSCF): __doc__ = newton_casscf.CASSCF.__doc__ - def __init__(self, mf_or_mol, ncas, nelecas, ncore=None, frozen=None): + def __init__(self, mf_or_mol, ncas=0, nelecas=0, ncore=None, frozen=None): newton_casscf.CASSCF.__init__(self, mf_or_mol, ncas, nelecas, ncore, frozen) assert (self.mol.symmetry) self.fcisolver = fci.solver(self.mol, False, True) diff --git a/pyscf/mcscf/ucasci.py b/pyscf/mcscf/ucasci.py index 307f9e8a9c..1c15c25e42 100644 --- a/pyscf/mcscf/ucasci.py +++ b/pyscf/mcscf/ucasci.py @@ -119,7 +119,7 @@ def kernel(casci, mo_coeff=None, ci0=None, verbose=logger.NOTE, envs=None): class UCASBase(CASBase): # nelecas is tuple of (nelecas_alpha, nelecas_beta) - def __init__(self, mf_or_mol, ncas, nelecas, ncore=None): + def __init__(self, mf_or_mol, ncas=0, nelecas=0, ncore=None): #assert ('UHF' == mf.__class__.__name__) if isinstance(mf_or_mol, gto.Mole): mf = scf.UHF(mf_or_mol) diff --git a/pyscf/mcscf/umc1step.py b/pyscf/mcscf/umc1step.py index 53b7d1022a..bb8b8778a9 100644 --- a/pyscf/mcscf/umc1step.py +++ b/pyscf/mcscf/umc1step.py @@ -379,7 +379,7 @@ class UCASSCF(ucasci.UCASBase): 'canonicalization', 'sorting_mo_energy', } - def __init__(self, mf_or_mol, ncas, nelecas, ncore=None, frozen=None): + def __init__(self, mf_or_mol, ncas=0, nelecas=0, ncore=None, frozen=None): ucasci.UCASBase.__init__(self, mf_or_mol, ncas, nelecas, ncore) self.frozen = frozen diff --git a/pyscf/mp/dfmp2.py b/pyscf/mp/dfmp2.py index d8cffdd57a..6522b21c19 100644 --- a/pyscf/mp/dfmp2.py +++ b/pyscf/mp/dfmp2.py @@ -140,6 +140,8 @@ def update_amps(self, t2, eris): def init_amps(self, mo_energy=None, mo_coeff=None, eris=None, with_t2=WITH_T2): return kernel(self, mo_energy, mo_coeff, eris, with_t2) + to_gpu = lib.to_gpu + MP2 = DFMP2 from pyscf import scf diff --git a/pyscf/mp/dfmp2_native.py b/pyscf/mp/dfmp2_native.py index 92a2ef0627..800c85d102 100644 --- a/pyscf/mp/dfmp2_native.py +++ b/pyscf/mp/dfmp2_native.py @@ -236,6 +236,8 @@ def kernel(self): def nuc_grad_method(self): raise NotImplementedError + to_gpu = lib.to_gpu + MP2 = RMP2 = DFMP2 = DFRMP2 diff --git a/pyscf/mp/dfump2_native.py b/pyscf/mp/dfump2_native.py index 5a3a3b0db8..a99f7dfb52 100644 --- a/pyscf/mp/dfump2_native.py +++ b/pyscf/mp/dfump2_native.py @@ -226,6 +226,8 @@ def delete(self): def nuc_grad_method(self): raise NotImplementedError + to_gpu = lib.to_gpu + MP2 = UMP2 = DFMP2 = DFUMP2 diff --git a/pyscf/mp/gmp2.py b/pyscf/mp/gmp2.py index 7fae2ffdd0..8f068df51b 100644 --- a/pyscf/mp/gmp2.py +++ b/pyscf/mp/gmp2.py @@ -214,6 +214,8 @@ def nuc_grad_method(self): def init_amps(self, mo_energy=None, mo_coeff=None, eris=None, with_t2=WITH_T2): return kernel(self, mo_energy, mo_coeff, eris, with_t2) + to_gpu = lib.to_gpu + MP2 = GMP2 scf.ghf.GHF.MP2 = lib.class_as_method(MP2) diff --git a/pyscf/mp/mp2.py b/pyscf/mp/mp2.py index 0955a18bb7..9d1dc431a5 100644 --- a/pyscf/mp/mp2.py +++ b/pyscf/mp/mp2.py @@ -649,6 +649,8 @@ def nuc_grad_method(self): def init_amps(self, mo_energy=None, mo_coeff=None, eris=None, with_t2=WITH_T2): return kernel(self, mo_energy, mo_coeff, eris, with_t2) + to_gpu = lib.to_gpu + RMP2 = MP2 from pyscf import scf diff --git a/pyscf/mp/ump2.py b/pyscf/mp/ump2.py index adbfc3b278..70c5bd8804 100644 --- a/pyscf/mp/ump2.py +++ b/pyscf/mp/ump2.py @@ -450,6 +450,8 @@ def nuc_grad_method(self): def init_amps(self, mo_energy=None, mo_coeff=None, eris=None, with_t2=WITH_T2): return kernel(self, mo_energy, mo_coeff, eris, with_t2) + to_gpu = lib.to_gpu + MP2 = UMP2 from pyscf import scf diff --git a/pyscf/pbc/cc/kccsd.py b/pyscf/pbc/cc/kccsd.py index 3fa54e7024..b837bb9212 100644 --- a/pyscf/pbc/cc/kccsd.py +++ b/pyscf/pbc/cc/kccsd.py @@ -463,6 +463,8 @@ def from_uccsd(self, t1, t2, orbspin=None): def to_uccsd(self, t1, t2, orbspin=None): return spin2spatial(t1, orbspin), spin2spatial(t2, orbspin) + to_gpu = lib.to_gpu + CCSD = KCCSD = KGCCSD = GCCSD diff --git a/pyscf/pbc/cc/kccsd_rhf.py b/pyscf/pbc/cc/kccsd_rhf.py index 4d100bdd7d..e4b3fb92c5 100644 --- a/pyscf/pbc/cc/kccsd_rhf.py +++ b/pyscf/pbc/cc/kccsd_rhf.py @@ -653,6 +653,8 @@ def eaccsd(self, nroots=1, left=False, koopmans=False, guess=None, def ao2mo(self, mo_coeff=None): return _ERIS(self, mo_coeff) + to_gpu = lib.to_gpu + ##################################### # Wrapper functions for IP/EA-EOM ##################################### diff --git a/pyscf/pbc/cc/kccsd_uhf.py b/pyscf/pbc/cc/kccsd_uhf.py index fbac93bf14..df8d580606 100644 --- a/pyscf/pbc/cc/kccsd_uhf.py +++ b/pyscf/pbc/cc/kccsd_uhf.py @@ -761,6 +761,8 @@ def vector_to_amplitudes(self, vec, nmo=None, nocc=None, nkpts=None): if nkpts is None: nkpts = self.nkpts return vector_to_amplitudes(vec, nmo, nocc, nkpts) + to_gpu = lib.to_gpu + UCCSD = KUCCSD diff --git a/pyscf/pbc/df/df.py b/pyscf/pbc/df/df.py index a03d058109..55d5bd7f16 100644 --- a/pyscf/pbc/df/df.py +++ b/pyscf/pbc/df/df.py @@ -524,6 +524,8 @@ def get_naoaux(self): naux += dat.shape[0] return naux + to_gpu = lib.to_gpu + DF = GDF class CDERIArray: diff --git a/pyscf/pbc/df/fft.py b/pyscf/pbc/df/fft.py index 8a1b982239..1d538ae4ee 100644 --- a/pyscf/pbc/df/fft.py +++ b/pyscf/pbc/df/fft.py @@ -355,3 +355,5 @@ def get_naoaux(self): return ngrids * 2 range_coulomb = aft.AFTDF.range_coulomb + + to_gpu = lib.to_gpu diff --git a/pyscf/pbc/dft/gen_grid.py b/pyscf/pbc/dft/gen_grid.py index 1cc4d9fa91..840a237448 100644 --- a/pyscf/pbc/dft/gen_grid.py +++ b/pyscf/pbc/dft/gen_grid.py @@ -134,6 +134,8 @@ def make_mask(self, cell=None, coords=None, relativity=0, shls_slice=None, if coords is None: coords = self.coords return make_mask(cell, coords, relativity, shls_slice, verbose) + to_gpu = lib.to_gpu + # modified from pyscf.dft.gen_grid.gen_partition def get_becke_grids(cell, atom_grid={}, radi_method=dft.radi.gauss_chebyshev, @@ -257,6 +259,8 @@ def make_mask(self, cell=None, coords=None, relativity=0, shls_slice=None, if coords is None: coords = self.coords return make_mask(cell, coords, relativity, shls_slice, verbose) + to_gpu = lib.to_gpu + AtomicGrids = BeckeGrids diff --git a/pyscf/pbc/dft/gks.py b/pyscf/pbc/dft/gks.py index 5536b53daa..a38a57a22b 100644 --- a/pyscf/pbc/dft/gks.py +++ b/pyscf/pbc/dft/gks.py @@ -143,3 +143,5 @@ def to_hf(self): '''Convert to GHF object.''' from pyscf.pbc import scf return self._transfer_attrs_(scf.GHF(self.cell, self.kpt)) + + to_gpu = lib.to_gpu diff --git a/pyscf/pbc/dft/kgks.py b/pyscf/pbc/dft/kgks.py index fd97e43cd1..7774f2e36e 100644 --- a/pyscf/pbc/dft/kgks.py +++ b/pyscf/pbc/dft/kgks.py @@ -148,3 +148,5 @@ def to_hf(self): '''Convert to KGHF object.''' from pyscf.pbc import scf return self._transfer_attrs_(scf.KGHF(self.cell, self.kpts)) + + to_gpu = lib.to_gpu diff --git a/pyscf/pbc/dft/krks.py b/pyscf/pbc/dft/krks.py index 3cd23636b1..6278605de9 100644 --- a/pyscf/pbc/dft/krks.py +++ b/pyscf/pbc/dft/krks.py @@ -184,6 +184,8 @@ def to_hf(self): from pyscf.pbc import scf return self._transfer_attrs_(scf.KRHF(self.cell, self.kpts)) + to_gpu = lib.to_gpu + if __name__ == '__main__': from pyscf.pbc import gto diff --git a/pyscf/pbc/dft/kroks.py b/pyscf/pbc/dft/kroks.py index e83d8e6d3e..1d2a1198f8 100644 --- a/pyscf/pbc/dft/kroks.py +++ b/pyscf/pbc/dft/kroks.py @@ -64,6 +64,8 @@ def to_hf(self): from pyscf.pbc import scf return self._transfer_attrs_(scf.KROHF(self.cell, self.kpts)) + to_gpu = lib.to_gpu + if __name__ == '__main__': from pyscf.pbc import gto diff --git a/pyscf/pbc/dft/kuks.py b/pyscf/pbc/dft/kuks.py index 634c99f8ff..ac66f973bc 100644 --- a/pyscf/pbc/dft/kuks.py +++ b/pyscf/pbc/dft/kuks.py @@ -160,6 +160,8 @@ def to_hf(self): from pyscf.pbc import scf return self._transfer_attrs_(scf.KUHF(self.cell, self.kpts)) + to_gpu = lib.to_gpu + if __name__ == '__main__': from pyscf.pbc import gto diff --git a/pyscf/pbc/dft/numint.py b/pyscf/pbc/dft/numint.py index 7d36fffb6a..056349f88e 100644 --- a/pyscf/pbc/dft/numint.py +++ b/pyscf/pbc/dft/numint.py @@ -1082,6 +1082,8 @@ def eval_rho1(self, cell, ao, dm, screen_index=None, xctype='LDA', hermi=0, return self.eval_rho(cell, ao, dm, screen_index, xctype, hermi, with_lapl, verbose) + to_gpu = lib.to_gpu + _NumInt = NumInt @@ -1287,4 +1289,6 @@ def make_rho(idm, ao_kpts, non0tab, xctype): cache_xc_kernel1 = cache_xc_kernel1 get_rho = get_rho + to_gpu = lib.to_gpu + _KNumInt = KNumInt diff --git a/pyscf/pbc/dft/rks.py b/pyscf/pbc/dft/rks.py index d3dc8d1047..bf12735aa6 100644 --- a/pyscf/pbc/dft/rks.py +++ b/pyscf/pbc/dft/rks.py @@ -346,6 +346,8 @@ def to_hf(self): from pyscf.pbc import scf return self._transfer_attrs_(scf.RHF(self.cell, self.kpt)) + to_gpu = lib.to_gpu + if __name__ == '__main__': from pyscf.pbc import gto diff --git a/pyscf/pbc/dft/roks.py b/pyscf/pbc/dft/roks.py index f7fe097c10..d422cfc3cb 100644 --- a/pyscf/pbc/dft/roks.py +++ b/pyscf/pbc/dft/roks.py @@ -68,6 +68,8 @@ def to_hf(self): from pyscf.pbc import scf return self._transfer_attrs_(scf.ROHF(self.cell, self.kpt)) + to_gpu = lib.to_gpu + if __name__ == '__main__': from pyscf.pbc import gto diff --git a/pyscf/pbc/dft/uks.py b/pyscf/pbc/dft/uks.py index 20d8d14c71..9619ec4d25 100644 --- a/pyscf/pbc/dft/uks.py +++ b/pyscf/pbc/dft/uks.py @@ -144,6 +144,8 @@ def to_hf(self): from pyscf.pbc import scf return self._transfer_attrs_(scf.UHF(self.cell, self.kpt)) + to_gpu = lib.to_gpu + if __name__ == '__main__': from pyscf.pbc import gto diff --git a/pyscf/pbc/mp/kmp2.py b/pyscf/pbc/mp/kmp2.py index 9b7a53b2bb..bcea49ca41 100644 --- a/pyscf/pbc/mp/kmp2.py +++ b/pyscf/pbc/mp/kmp2.py @@ -783,6 +783,8 @@ def kernel(self, mo_energy=None, mo_coeff=None, with_t2=WITH_T2): return self.e_corr, self.t2 + to_gpu = lib.to_gpu + KRMP2 = KMP2 diff --git a/pyscf/pbc/scf/ghf.py b/pyscf/pbc/scf/ghf.py index ddff1b850f..e5f12dc901 100644 --- a/pyscf/pbc/scf/ghf.py +++ b/pyscf/pbc/scf/ghf.py @@ -165,6 +165,8 @@ def convert_from_(self, mf): addons.convert_to_ghf(mf, self) return self + to_gpu = lib.to_gpu + if __name__ == '__main__': from pyscf.pbc import gto diff --git a/pyscf/pbc/scf/hf.py b/pyscf/pbc/scf/hf.py index f6c91336ed..3b5de33a5f 100644 --- a/pyscf/pbc/scf/hf.py +++ b/pyscf/pbc/scf/hf.py @@ -897,6 +897,7 @@ class RHF(SCF): analyze = mol_hf.RHF.analyze spin_square = mol_hf.RHF.spin_square stability = mol_hf.RHF.stability + to_gpu = lib.to_gpu def nuc_grad_method(self): raise NotImplementedError diff --git a/pyscf/pbc/scf/kghf.py b/pyscf/pbc/scf/kghf.py index f271bd28da..2ce492067c 100644 --- a/pyscf/pbc/scf/kghf.py +++ b/pyscf/pbc/scf/kghf.py @@ -200,6 +200,8 @@ def __init__(self, cell, kpts=np.zeros((1,3)), analyze = khf.analyze convert_from_ = pbcghf.GHF.convert_from_ + to_gpu = lib.to_gpu + def get_hcore(self, cell=None, kpts=None): hcore = khf.KSCF.get_hcore(self, cell, kpts) hcore = lib.asarray([scipy.linalg.block_diag(h, h) for h in hcore]) diff --git a/pyscf/pbc/scf/khf.py b/pyscf/pbc/scf/khf.py index 89124e8af4..49dff73d2b 100644 --- a/pyscf/pbc/scf/khf.py +++ b/pyscf/pbc/scf/khf.py @@ -688,6 +688,7 @@ class KRHF(KSCF): analyze = analyze spin_square = mol_hf.RHF.spin_square + to_gpu = lib.to_gpu def check_sanity(self): cell = self.cell diff --git a/pyscf/pbc/scf/krohf.py b/pyscf/pbc/scf/krohf.py index 6a23588fb7..b6b6da83a3 100644 --- a/pyscf/pbc/scf/krohf.py +++ b/pyscf/pbc/scf/krohf.py @@ -274,6 +274,7 @@ class KROHF(khf.KRHF): analyze = khf.analyze spin_square = pbcrohf.ROHF.spin_square canonicalize = canonicalize + to_gpu = lib.to_gpu def __init__(self, cell, kpts=np.zeros((1,3)), exxdiv=getattr(__config__, 'pbc_scf_SCF_exxdiv', 'ewald')): diff --git a/pyscf/pbc/scf/kuhf.py b/pyscf/pbc/scf/kuhf.py index eae04c0713..ff911afac4 100644 --- a/pyscf/pbc/scf/kuhf.py +++ b/pyscf/pbc/scf/kuhf.py @@ -384,6 +384,7 @@ class KUHF(khf.KSCF): get_rho = get_rho analyze = khf.analyze canonicalize = canonicalize + to_gpu = lib.to_gpu def __init__(self, cell, kpts=np.zeros((1,3)), exxdiv=getattr(__config__, 'pbc_scf_SCF_exxdiv', 'ewald')): diff --git a/pyscf/pbc/scf/rohf.py b/pyscf/pbc/scf/rohf.py index a34fade115..d45a507982 100644 --- a/pyscf/pbc/scf/rohf.py +++ b/pyscf/pbc/scf/rohf.py @@ -62,6 +62,7 @@ class ROHF(pbchf.RHF): spin_square = mol_rohf.ROHF.spin_square stability = mol_rohf.ROHF.stability dip_moment = pbchf.SCF.dip_moment + to_gpu = lib.to_gpu def __init__(self, cell, kpt=np.zeros(3), exxdiv=getattr(__config__, 'pbc_scf_SCF_exxdiv', 'ewald')): diff --git a/pyscf/pbc/scf/rsjk.py b/pyscf/pbc/scf/rsjk.py index 3e15d09162..0efe64e822 100644 --- a/pyscf/pbc/scf/rsjk.py +++ b/pyscf/pbc/scf/rsjk.py @@ -1160,6 +1160,8 @@ def merge_dd(Gpq, p0, p1, ki_lst, kj_lst): log.timer_debug1('get_lr_k_kpts', *cpu0) return vk_kpts + to_gpu = lib.to_gpu + RangeSeparationJKBuilder = RangeSeparatedJKBuilder def _purify(mat_kpts, phase): diff --git a/pyscf/pbc/scf/uhf.py b/pyscf/pbc/scf/uhf.py index 0d247f745e..3dfd53866b 100644 --- a/pyscf/pbc/scf/uhf.py +++ b/pyscf/pbc/scf/uhf.py @@ -128,6 +128,7 @@ class UHF(pbchf.SCF): canonicalize = mol_uhf.UHF.canonicalize spin_square = mol_uhf.UHF.spin_square stability = mol_uhf.UHF.stability + to_gpu = lib.to_gpu def __init__(self, cell, kpt=np.zeros(3), exxdiv=getattr(__config__, 'pbc_scf_SCF_exxdiv', 'ewald')): diff --git a/pyscf/qmmm/itrf.py b/pyscf/qmmm/itrf.py index 3bb79202fb..f787f03309 100644 --- a/pyscf/qmmm/itrf.py +++ b/pyscf/qmmm/itrf.py @@ -188,6 +188,11 @@ def energy_nuc(self): nuc += q2*(charges/r).sum() return nuc + def to_gpu(self): + obj = self.undo_qmmm().to_gpu() + obj = qmmm_for_scf(obj, self.mm_mol) + return lib.to_gpu(self, obj) + def nuc_grad_method(self): scf_grad = super().nuc_grad_method() return qmmm_grad_for_scf(scf_grad) @@ -207,6 +212,8 @@ def undo_qmmm(self): obj._scf = self._scf.undo_qmmm() return obj + to_gpu = QMMMSCF.to_gpu + def add_mm_charges_grad(scf_grad, atoms_or_coords, charges, radii=None, unit=None): '''Apply the MM charges in the QM gradients' method. It affects both the @@ -396,6 +403,11 @@ def grad_nuc_mm(self, mol=None): g_mm += q1 * numpy.einsum('i,ix,i->ix', charges, r1-coords, 1/r**3) return g_mm + def to_gpu(self): + obj = self.undo_qmmm().to_gpu() + obj = qmmm_grad_for_scf(obj) + return lib.to_gpu(self, obj) + _QMMMGrad = QMMMGrad # Inject QMMM interface wrapper to other modules diff --git a/pyscf/scf/dhf.py b/pyscf/scf/dhf.py index 6e29d5a450..099074c71b 100644 --- a/pyscf/scf/dhf.py +++ b/pyscf/scf/dhf.py @@ -454,12 +454,11 @@ class DHF(hf.SCF): ssss_approx = getattr(__config__, 'scf_dhf_SCF_ssss_approx', 'Visscher') _keys = {'conv_tol', 'with_ssss', 'with_gaunt', - 'with_breit', 'ssss_approx', 'opt'} + 'with_breit', 'ssss_approx'} def __init__(self, mol): hf.SCF.__init__(self, mol) self._coulomb_level = 'SSSS' # 'SSSS' ~ LLLL+LLSS+SSSS - self.opt = None # (opt_llll, opt_ssll, opt_ssss, opt_gaunt) def dump_flags(self, verbose=None): hf.SCF.dump_flags(self, verbose) @@ -518,8 +517,6 @@ def init_guess_by_chkfile(self, chkfile=None, project=None): def build(self, mol=None): if self.verbose >= logger.WARN: self.check_sanity() - if self.direct_scf: - self.opt = self.init_direct_scf(mol) return self def get_occ(self, mo_energy=None, mo_coeff=None): @@ -686,7 +683,6 @@ def reset(self, mol=None): self.mol = mol self._coulomb_level = 'SSSS' # 'SSSS' ~ LLLL+LLSS+SSSS self._opt = {None: None} - self.opt = None # (opt_llll, opt_ssll, opt_ssss, opt_gaunt) return self def stability(self, internal=None, external=None, verbose=None, return_status=False): @@ -749,6 +745,8 @@ def to_dks(self, xc='HF'): to_ks = to_dks + to_gpu = lib.to_gpu + UHF = UDHF = DHF diff --git a/pyscf/scf/diis.py b/pyscf/scf/diis.py index a442f58b9c..0eea4da094 100644 --- a/pyscf/scf/diis.py +++ b/pyscf/scf/diis.py @@ -44,10 +44,6 @@ def __init__(self, mf=None, filename=None, Corth=None): self.space = 8 self.Corth = Corth self.damp = 0 - #?self._scf = mf - #?if hasattr(self._scf, 'get_orbsym'): # Symmetry adapted SCF objects - #? self.orbsym = mf.get_orbsym(Corth) - #? sym_forbid = self.orbsym[:,None] != self.orbsym def update(self, s, d, f, *args, **kwargs): errvec = get_err_vec(s, d, f, self.Corth) diff --git a/pyscf/scf/ghf.py b/pyscf/scf/ghf.py index 08ac67603d..ec35bed657 100644 --- a/pyscf/scf/ghf.py +++ b/pyscf/scf/ghf.py @@ -382,16 +382,14 @@ class GHF(hf.SCF): mo_coeff[nao:nao*2] are the coefficients of AO with beta spin. ''' + with_soc = None + _keys = {'with_soc'} get_init_guess = hf.RHF.get_init_guess get_occ = get_occ _finalize = uhf.UHF._finalize - def __init__(self, mol): - hf.SCF.__init__(self, mol) - self.with_soc = None - def get_hcore(self, mol=None): if mol is None: mol = self.mol hcore = hf.get_hcore(mol) @@ -541,9 +539,7 @@ def to_ks(self, xc='HF'): from pyscf import dft return self._transfer_attrs_(dft.GKS(self.mol, xc=xc)) - def to_gpu(self): - from gpu4pyscf.scf import GHF - return lib.to_gpu(hf.SCF.reset(self.view(GHF))) + to_gpu = lib.to_gpu def _from_rhf_init_dm(dm, breaksym=True): dma = dm * .5 diff --git a/pyscf/scf/ghf_symm.py b/pyscf/scf/ghf_symm.py index 8dc9a29520..3b419bff62 100644 --- a/pyscf/scf/ghf_symm.py +++ b/pyscf/scf/ghf_symm.py @@ -281,8 +281,7 @@ def get_orbsym(self, mo_coeff=None, s=None): return numpy.asarray(get_orbsym(self.mol, mo_coeff, s)) orbsym = property(get_orbsym) - def to_gpu(self): - raise NotImplementedError + to_gpu = lib.to_gpu GHF = SymAdaptedGHF diff --git a/pyscf/scf/hf.py b/pyscf/scf/hf.py index 7a8c0e8f22..7119745708 100644 --- a/pyscf/scf/hf.py +++ b/pyscf/scf/hf.py @@ -1504,6 +1504,7 @@ class SCF(lib.StreamObject): conv_tol_grad = getattr(__config__, 'scf_hf_SCF_conv_tol_grad', None) max_cycle = getattr(__config__, 'scf_hf_SCF_max_cycle', 50) init_guess = getattr(__config__, 'scf_hf_SCF_init_guess', 'minao') + disp = None # for DFT-D3 and DFT-D4 # To avoid diis pollution from previous run, self.diis should not be # initialized as DIIS instance here @@ -1542,7 +1543,6 @@ def __init__(self, mol): self.verbose = mol.verbose self.max_memory = mol.max_memory self.stdout = mol.stdout - self.disp = None # If chkfile is muted, SCF intermediates will not be dumped anywhere. if MUTE_CHKFILE: @@ -2062,8 +2062,12 @@ def _transfer_attrs_(self, dst): '''This helper function transfers attributes from one SCF object to another SCF object. It is invoked by to_ks and to_hf methods. ''' + # Search for all tracked attributes, including those in base classes + cls_keys = [getattr(cls, '_keys', ()) for cls in dst.__class__.__mro__[:-1]] + dst_keys = set(dst.__dict__).union(*cls_keys) + loc_dic = self.__dict__ - keys = dst.__dict__.keys() & loc_dic.keys() + keys = set(loc_dic).intersection(dst_keys) dst.__dict__.update({k: loc_dic[k] for k in keys}) dst.converged = False return dst @@ -2197,11 +2201,8 @@ def to_ks(self, xc='HF'): from pyscf import dft return self._transfer_attrs_(dft.RKS(self.mol, xc=xc)) - def to_gpu(self): - # FIXME: consider the density_fit, x2c and soscf decoration - from gpu4pyscf.scf import RHF - obj = SCF.reset(self.view(RHF)) - return lib.to_gpu(obj) + # FIXME: consider the density_fit, x2c and soscf decoration + to_gpu = lib.to_gpu def _hf1e_scf(mf, *args): logger.info(mf, '\n') diff --git a/pyscf/scf/hf_symm.py b/pyscf/scf/hf_symm.py index 5da8d7cd2a..966b1bcbaa 100644 --- a/pyscf/scf/hf_symm.py +++ b/pyscf/scf/hf_symm.py @@ -573,8 +573,7 @@ def get_orbsym(self, mo_coeff=None, s=None): canonicalize = canonicalize - def to_gpu(self): - raise NotImplementedError + to_gpu = lib.to_gpu RHF = SymAdaptedRHF @@ -934,8 +933,7 @@ def canonicalize(self, mo_coeff, mo_occ, fock=None): get_wfnsym = get_wfnsym wfnsym = property(get_wfnsym) - def to_gpu(self): - raise NotImplementedError + to_gpu = lib.to_gpu ROHF = SymAdaptedROHF diff --git a/pyscf/scf/rohf.py b/pyscf/scf/rohf.py index 951e08a526..5ea9a6dc28 100644 --- a/pyscf/scf/rohf.py +++ b/pyscf/scf/rohf.py @@ -520,9 +520,7 @@ def to_ks(self, xc='HF'): from pyscf import dft return self._transfer_attrs_(dft.ROKS(self.mol, xc=xc)) - def to_gpu(self): - from gpu4pyscf.scf import ROHF - return lib.to_gpu(hf.SCF.reset(self.view(ROHF))) + to_gpu = lib.to_gpu class HF1e(ROHF): diff --git a/pyscf/scf/uhf.py b/pyscf/scf/uhf.py index 0afc66d0ba..0ae13f1eb6 100644 --- a/pyscf/scf/uhf.py +++ b/pyscf/scf/uhf.py @@ -763,6 +763,8 @@ class UHF(hf.SCF): S^2 = 0.7570150, 2S+1 = 2.0070027 ''' + init_guess_breaksym = None + _keys = {"init_guess_breaksym"} def __init__(self, mol): @@ -771,7 +773,6 @@ def __init__(self, mol): # self.mo_occ => [mo_occ_a, mo_occ_b] # self.mo_energy => [mo_energy_a, mo_energy_b] self.nelec = None - self.init_guess_breaksym = None @property def nelec(self): @@ -1066,9 +1067,7 @@ def to_ks(self, xc='HF'): from pyscf import dft return self._transfer_attrs_(dft.UKS(self.mol, xc=xc)) - def to_gpu(self): - from gpu4pyscf.scf import UHF - return lib.to_gpu(hf.SCF.reset(self.view(UHF))) + to_gpu = lib.to_gpu def _hf1e_scf(mf, *args): logger.info(mf, '\n') diff --git a/pyscf/scf/uhf_symm.py b/pyscf/scf/uhf_symm.py index 1ea38b0956..7253fd96ea 100644 --- a/pyscf/scf/uhf_symm.py +++ b/pyscf/scf/uhf_symm.py @@ -565,8 +565,7 @@ def get_orbsym(self, mo_coeff=None, s=None): canonicalize = canonicalize - def to_gpu(self): - raise NotImplementedError + to_gpu = lib.to_gpu UHF = SymAdaptedUHF diff --git a/pyscf/sgx/sgx.py b/pyscf/sgx/sgx.py index 25f05e2e9e..c004745117 100644 --- a/pyscf/sgx/sgx.py +++ b/pyscf/sgx/sgx.py @@ -214,6 +214,9 @@ def post_kernel(self, envs): self._last_vj = 0 self._last_vk = 0 + def to_gpu(self): + raise NotImplementedError + def method_not_implemented(self, *args, **kwargs): raise NotImplementedError nuc_grad_method = Gradients = method_not_implemented @@ -374,3 +377,5 @@ def get_jk(self, dm, hermi=1, vhfopt=None, with_j=True, with_k=True, else: vj, vk = sgx_jk.get_jk(self, dm, hermi, with_j, with_k, direct_scf_tol) return vj, vk + + to_gpu = lib.to_gpu diff --git a/pyscf/solvent/_attach_solvent.py b/pyscf/solvent/_attach_solvent.py index 1f98b9677d..8e0b6b177a 100644 --- a/pyscf/solvent/_attach_solvent.py +++ b/pyscf/solvent/_attach_solvent.py @@ -147,6 +147,11 @@ def stability(self, *args, **kwargs): equilibrium_solvation=not self.with_solvent.frozen): return super().stability(*args, **kwargs) + def to_gpu(self): + obj = self.undo_solvent().to_gpu() + obj = _for_scf(obj, self.with_solvent) + return lib.to_gpu(self, obj) + def _for_casscf(mc, solvent_obj, dm=None): '''Add solvent model to CASSCF method. @@ -284,6 +289,11 @@ def nuc_grad_method(self): Gradients = nuc_grad_method + def to_gpu(self): + obj = self.undo_solvent().to_gpu() + obj = _for_casscf(obj, self.with_solvent) + return lib.to_gpu(self, obj) + def _for_casci(mc, solvent_obj, dm=None): '''Add solvent model to CASCI method. @@ -421,6 +431,11 @@ def nuc_grad_method(self): Gradients = nuc_grad_method + def to_gpu(self): + obj = self.undo_solvent().to_gpu() + obj = _for_casci(obj, self.with_solvent) + return lib.to_gpu(self, obj) + def _for_post_scf(method, solvent_obj, dm=None): '''A wrapper of solvent model for post-SCF methods (CC, CI, MP etc.) @@ -552,6 +567,11 @@ def nuc_grad_method(self): Gradients = nuc_grad_method + def to_gpu(self): + obj = self.undo_solvent().to_gpu() + obj = _for_post_scf(obj, self.with_solvent) + return lib.to_gpu(self, obj) + def _for_tdscf(method, solvent_obj, dm=None): '''Add solvent model in TDDFT calculations. @@ -583,7 +603,7 @@ def _for_tdscf(method, solvent_obj, dm=None): class TDSCFWithSolvent(_Solvation): _keys = {'with_solvent'} - def __init__(self, method, scf_with_solvent): + def __init__(self, method, scf_with_solvent=None): self.__dict__.update(method.__dict__) self._scf = scf_with_solvent self.with_solvent = self._scf.with_solvent @@ -630,3 +650,8 @@ def get_ab(self, mf=None): def nuc_grad_method(self): grad_method = super().nuc_grad_method() return self.with_solvent.nuc_grad_method(grad_method) + + def to_gpu(self): + obj = self.undo_solvent().to_gpu() + obj = _for_tdscf(obj, self.with_solvent) + return lib.to_gpu(self, obj) diff --git a/pyscf/solvent/ddcosmo.py b/pyscf/solvent/ddcosmo.py index e0293aae8f..88062c2b48 100644 --- a/pyscf/solvent/ddcosmo.py +++ b/pyscf/solvent/ddcosmo.py @@ -869,6 +869,8 @@ def nuc_grad_method(self, grad_method): else: return ddcosmo_grad.make_grad_object(grad_method) + to_gpu = lib.to_gpu + DDCOSMO = ddCOSMO class Grids(gen_grid.Grids): diff --git a/pyscf/soscf/newton_ah.py b/pyscf/soscf/newton_ah.py index dca21e6f57..8a2a2dd0d7 100644 --- a/pyscf/soscf/newton_ah.py +++ b/pyscf/soscf/newton_ah.py @@ -800,6 +800,9 @@ def rotate_mo(self, mo_coeff, u, log=None): _effective_svd(u[idx][:,idx], 1e-5)) return mo + def to_gpu(self): + return self.undo_soscf().to_gpu() + class _SecondOrderROHF(_CIAH_SOSCF): gen_g_hop = gen_g_hop_rohf diff --git a/pyscf/tdscf/rhf.py b/pyscf/tdscf/rhf.py index 265d37d110..c90fac184b 100644 --- a/pyscf/tdscf/rhf.py +++ b/pyscf/tdscf/rhf.py @@ -764,6 +764,9 @@ def _finalize(self): logger.note(self, 'Excited State energies (eV)\n%s', self.e * nist.HARTREE2EV) return self + def to_gpu(self): + raise NotImplementedError + class TDA(TDBase): '''Tamm-Dancoff approximation @@ -866,6 +869,8 @@ def pickeig(w, v, nroots, envs): self._finalize() return self.e, self.xy + to_gpu = lib.to_gpu + CIS = TDA @@ -1044,6 +1049,8 @@ def nuc_grad_method(self): from pyscf.grad import tdrhf return tdrhf.Gradients(self) + to_gpu = lib.to_gpu + RPA = TDRHF = TDHF scf.hf.RHF.TDA = lib.class_as_method(TDA) diff --git a/pyscf/tdscf/uhf.py b/pyscf/tdscf/uhf.py index 398b777753..17554d1044 100644 --- a/pyscf/tdscf/uhf.py +++ b/pyscf/tdscf/uhf.py @@ -690,6 +690,8 @@ def pickeig(w, v, nroots, envs): self._finalize() return self.e, self.xy + to_gpu = lib.to_gpu + CIS = TDA @@ -858,6 +860,8 @@ def pickeig(w, v, nroots, envs): self._finalize() return self.e, self.xy + to_gpu = lib.to_gpu + RPA = TDUHF = TDHF scf.uhf.UHF.TDA = lib.class_as_method(TDA) diff --git a/pyscf/x2c/sfx2c1e.py b/pyscf/x2c/sfx2c1e.py index 6b6d8f8334..c3fde295c1 100644 --- a/pyscf/x2c/sfx2c1e.py +++ b/pyscf/x2c/sfx2c1e.py @@ -154,6 +154,10 @@ def _transfer_attrs_(self, dst): dst = dst.sfx2c() return hf.SCF._transfer_attrs_(self, dst) + def to_gpu(self): + obj = self.undo_x2c().to_gpu().sfx2c1e() + return lib.to_gpu(self, obj) + class SpinFreeX2CHelper(x2c.X2CHelperBase): '''1-component X2c (spin-free part only) diff --git a/pyscf/x2c/x2c.py b/pyscf/x2c/x2c.py index bd2eee5fed..29a6517b36 100644 --- a/pyscf/x2c/x2c.py +++ b/pyscf/x2c/x2c.py @@ -659,6 +659,8 @@ def to_ks(self, xc='HF'): from pyscf.x2c import dft return self._transfer_attrs_(dft.UKS(self.mol, xc=xc)) + to_gpu = lib.to_gpu + X2C_UHF = UHF class RHF(SCF): @@ -680,6 +682,8 @@ def to_ks(self, xc='HF'): from pyscf.x2c import dft return self._transfer_attrs_(dft.RKS(self.mol, xc=xc)) + to_gpu = lib.to_gpu + X2C_RHF = RHF def x2c1e_ghf(mf): @@ -799,6 +803,10 @@ def _transfer_attrs_(self, dst): def to_ks(self, xc='HF'): raise NotImplementedError + def to_gpu(self): + obj = self.undo_x2c().to_gpu().x2c1e() + return lib.to_gpu(self, obj) + def _uncontract_mol(mol, xuncontract=None, exp_drop=0.2): '''mol._basis + uncontracted steep functions''' From d3f622d46eef5d9b8702fa6f4577babfb6c2ccfe Mon Sep 17 00:00:00 2001 From: "Junjie, Yang" Date: Fri, 15 Mar 2024 04:26:42 +0800 Subject: [PATCH 23/44] Improve memory usage for RPA (#2115) * improve memory usage for RPA with outcore ao2mo; fix small bug * raise something when loading libpbc.get_Gv * fix flake8 issue * fix flake8 issue * used DF-RHF as EXX, changed ref values * unrestricted case * unrestricted case * Restore pyscf/pbc/gto/cell.py --- examples/gw/03-drpa.py | 32 ++++ pyscf/gto/mole.py | 5 +- pyscf/gw/rpa.py | 329 +++++++++++++++++++++++++++----------- pyscf/gw/test/test_gw.py | 5 +- pyscf/gw/test/test_ugw.py | 5 +- pyscf/gw/urpa.py | 262 +++++++++++++++--------------- 6 files changed, 410 insertions(+), 228 deletions(-) create mode 100644 examples/gw/03-drpa.py diff --git a/examples/gw/03-drpa.py b/examples/gw/03-drpa.py new file mode 100644 index 0000000000..1c2596684a --- /dev/null +++ b/examples/gw/03-drpa.py @@ -0,0 +1,32 @@ +''' +Direct RPA correlation energy +''' + +from pyscf import gto, dft, gw + +mol = gto.M( + atom = """ +O 0.48387 -0.41799 -0.63869 +H 0.58103 0.36034 -0.05009 +H 1.01598 -1.09574 -0.18434 +H 0.68517 -2.88004 0.87771 +O 1.59649 -2.63873 0.61189 +H 1.72242 -3.22647 -0.15071 +H -2.47665 1.59686 -0.33246 +O -1.55912 1.35297 -0.13891 +H -1.25777 0.82058 -0.89427 +H -1.87830 -2.91357 -0.21825 +O -1.14269 -2.57648 0.31845 +H -0.81003 -1.77219 -0.15155 +""", + basis = 'ccpvqz', verbose = 5, + ) + +mf = dft.RKS(mol).density_fit() +mf.xc = 'pbe' +mf.kernel() + +import pyscf.gw.rpa +rpa = gw.rpa.dRPA(mf) +rpa.max_memory = 50 +rpa.kernel() diff --git a/pyscf/gto/mole.py b/pyscf/gto/mole.py index 28d8fd444d..9c0a2d2125 100644 --- a/pyscf/gto/mole.py +++ b/pyscf/gto/mole.py @@ -2420,8 +2420,8 @@ def enuc(self): self._enuc = self.energy_nuc() return self._enuc @enuc.setter - def enuc(self, enuc): - self._enuc = enuc + def enuc(self, x): + self._enuc = x copy = copy @@ -2798,6 +2798,7 @@ def dump_input(self): if self.verbose >= logger.INFO: self.stdout.write('\n') logger.info(self, 'nuclear repulsion = %.15g', self.enuc) + if self.symmetry: if self.topgroup == self.groupname: logger.info(self, 'point group symmetry = %s', self.topgroup) diff --git a/pyscf/gw/rpa.py b/pyscf/gw/rpa.py index 8304432298..c0d0bc306a 100755 --- a/pyscf/gw/rpa.py +++ b/pyscf/gw/rpa.py @@ -26,7 +26,8 @@ X. Ren et al., New J. Phys. 14, 053020 (2012) """ -import numpy as np +import numpy as np, scipy + from pyscf import lib from pyscf.lib import logger from pyscf.ao2mo import _ao2mo @@ -36,88 +37,123 @@ einsum = lib.einsum # **************************************************************************** -# core routines, kernel, rpa_ecorr, rho_response +# core routines kernel # **************************************************************************** -def kernel(rpa, mo_energy, mo_coeff, Lpq=None, nw=40, x0=0.5, verbose=logger.NOTE): +def kernel(rpa, mo_energy, mo_coeff, cderi_ov=None, nw=40, x0=0.5, verbose=logger.NOTE): """ RPA correlation and total energy Args: - Lpq : density fitting 3-center integral in MO basis. - nw : number of frequency point on imaginary axis. - x0: scaling factor for frequency grid. + cderi_ov: + Array-like object, Cholesky decomposed ERI in OV subspace. + nw: + number of frequency point on imaginary axis. + x0: + scaling factor for frequency grid. Returns: - e_tot : RPA total energy - e_hf : EXX energy - e_corr : RPA correlation energy + e_tot: + RPA total energy + e_hf: + EXX energy + e_corr: + RPA correlation energy """ mf = rpa._scf + # only support frozen core if rpa.frozen is not None: assert isinstance(rpa.frozen, int) - assert rpa.frozen < rpa.nocc + assert rpa.frozen < np.min(rpa.nocc) + + # Get orbital number + with_df = rpa.with_df + naux = with_df.get_naoaux() + norb = rpa._scf.mol.nao_nr() + + # Get memory information + max_memory = max(0, rpa.max_memory * 0.9 - lib.current_memory()[0]) + if max_memory < naux ** 2 / 1e6: + logger.warn( + rpa, 'Memory may not be enough! Available memory %d MB < %d MB', + max_memory, naux ** 2 / 1e6 + ) + + # AO -> MO transformation + if cderi_ov is None: + blksize = int(max_memory * 1e6 / (8 * norb ** 2)) + blksize = min(naux, blksize) + blksize = max(1, blksize) + + # logger.debug(rpa, 'cderi memory: %6d MB', naux * norb ** 2 * 8 / 1e6) + # logger.debug(rpa, 'cderi_ov memory: %6d MB', naux * nocc * nvir * 8 / 1e6) + logger.debug(rpa, 'ao2mo blksize = %d', blksize) + if blksize == 1: + logger.warn(rpa, 'Memory too small for ao2mo! blksize = 1') + + cderi_ov = rpa.ao2mo(mo_coeff, blksize=blksize) + + # Compute exact exchange energy (EXX) + e_hf = _ene_hf(mf, with_df) + e_ov = rpa.make_e_ov(mo_energy) - if Lpq is None: - Lpq = rpa.ao2mo(mo_coeff) + # Compute RPA correlation energy + e_corr = 0.0 - # Grids for integration on imaginary axis - freqs, wts = _get_scaled_legendre_roots(nw, x0) + # Determine block size for dielectric matrix + blksize = int(max_memory * 1e6 / 8 / naux) + blksize = max(blksize, 1) - # Compute HF exchange energy (EXX) - dm = mf.make_rdm1() - rhf = scf.RHF(rpa.mol) - e_hf = rhf.energy_elec(dm=dm)[0] - e_hf += mf.energy_nuc() + if blksize == 1: + logger.warn(rpa, 'Memory too small for dielectric matrix! blksize = 1') - # Compute RPA correlation energy - e_corr = get_rpa_ecorr(rpa, Lpq, freqs, wts) + logger.debug(rpa, 'diel blksize = %d', blksize) + + # Grids for numerical integration on imaginary axis + for omega, weigh in zip(*_get_scaled_legendre_roots(nw, x0)): + diel = rpa.make_dielectric_matrix(omega, e_ov, cderi_ov, blksize=blksize) + factor = weigh / (2.0 * np.pi) + e_corr += factor * np.log(np.linalg.det(np.eye(naux) - diel)) + e_corr += factor * np.trace(diel) # Compute total energy e_tot = e_hf + e_corr - logger.debug(rpa, ' RPA total energy = %s', e_tot) logger.debug(rpa, ' EXX energy = %s, RPA corr energy = %s', e_hf, e_corr) return e_tot, e_hf, e_corr -def get_rpa_ecorr(rpa, Lpq, freqs, wts): - """ - Compute RPA correlation energy +# **************************************************************************** +# frequency integral quadrature, legendre, clenshaw_curtis +# **************************************************************************** + +def make_dielectric_matrix(omega, e_ov, cderi_ov, blksize=None): """ - mo_energy = _mo_energy_without_core(rpa, rpa._scf.mo_energy) - nocc = rpa.nocc - nw = len(freqs) - naux = Lpq.shape[0] + Compute dielectric matrix at a given frequency omega - if (mo_energy[nocc] - mo_energy[nocc-1]) < 1e-3: - logger.warn(rpa, 'Current RPA code not well-defined for degeneracy!') + Args: + omega : float, frequency + e_ov : 1D array (nocc * nvir), orbital energy differences + cderi_ov : 2D array (naux, nocc * nvir), Cholesky decomposed ERI + in OV subspace. - e_corr = 0. - for w in range(nw): - Pi = get_rho_response(freqs[w], mo_energy, Lpq[:, :nocc, nocc:]) - ec_w = np.log(np.linalg.det(np.eye(naux) - Pi)) - ec_w += np.trace(Pi) - e_corr += 1./(2.*np.pi) * ec_w * wts[w] + Returns: + diel : 2D array (naux, naux), dielectric matrix + """ + assert blksize is not None - return e_corr + naux, nov = cderi_ov.shape -def get_rho_response(omega, mo_energy, Lpq): - """ - Compute density response function in auxiliary basis at freq iw. - """ - naux, nocc, nvir = Lpq.shape - eia = mo_energy[:nocc, None] - mo_energy[None, nocc:] - eia = eia / (omega**2 + eia * eia) - # Response from both spin-up and spin-down density - Pia = Lpq * (eia * 4.0) - Pi = einsum('Pia, Qia -> PQ', Pia, Lpq) - return Pi + chi0 = (2.0 * e_ov / (omega ** 2 + e_ov ** 2)).ravel() + diel = np.zeros((naux, naux)) -# **************************************************************************** -# frequency integral quadrature, legendre, clenshaw_curtis -# **************************************************************************** + for s in [slice(*x) for x in lib.prange(0, nov, blksize)]: + v_ov = cderi_ov[:, s] + diel += np.dot(v_ov * chi0[s], v_ov.T) + v_ov = None + + return diel def _get_scaled_legendre_roots(nw, x0=0.5): """ @@ -138,6 +174,7 @@ def _get_clenshaw_curtis_roots(nw): """ Clenshaw-Curtis qaudrature on [0,inf) Ref: J. Chem. Phys. 132, 234114 (2010) + Returns: freqs : 1D array wts : 1D array @@ -149,22 +186,44 @@ def _get_clenshaw_curtis_roots(nw): t = (w + 1.0) / nw * np.pi * 0.5 freqs[w] = a / np.tan(t) if w != nw - 1: - wts[w] = a*np.pi * 0.5 / nw / (np.sin(t)**2) + wts[w] = a * np.pi * 0.50 / nw / (np.sin(t)**2) else: - wts[w] = a*np.pi * 0.25 / nw / (np.sin(t)**2) + wts[w] = a * np.pi * 0.25 / nw / (np.sin(t)**2) return freqs[::-1], wts[::-1] +def _ene_hf(mf=None, with_df=None): + """ + Args: + mf: converged mean-field object, can be either HF or KS + with_df: density fitting object + + Returns: + e_hf: float, total Hartree-Fock energy + """ + assert mf.converged + hf_obj = mf if not isinstance(mf, scf.hf.KohnShamDFT) else mf.to_hf() + + if not getattr(hf_obj, 'with_df', None): + hf_obj = hf_obj.density_fit(with_df=with_df) + dm = hf_obj.make_rdm1() + + e_hf = hf_obj.energy_elec(dm=dm)[0] + e_hf += hf_obj.energy_nuc() + return e_hf + def _mo_energy_without_core(rpa, mo_energy): return mo_energy[get_frozen_mask(rpa)] def _mo_without_core(rpa, mo): return mo[:,get_frozen_mask(rpa)] -class RPA(lib.StreamObject): +class DirectRPA(lib.StreamObject): _keys = { 'mol', 'frozen', - 'with_df', 'mo_energy', 'mo_coeff', 'mo_occ', 'e_corr', 'e_hf', 'e_tot', + 'with_df', 'mo_energy', + 'mo_coeff', 'mo_occ', + 'e_corr', 'e_hf', 'e_tot', } def __init__(self, mf, frozen=None, auxbasis=None): @@ -185,8 +244,8 @@ def __init__(self, mf, frozen=None, auxbasis=None): else: self.with_df.auxbasis = df.make_auxbasis(mf.mol, mp2fit=True) -################################################## -# don't modify the following attributes, they are not input options + ################################################## + # don't modify the following attributes, they are not input options self._nocc = None self._nmo = None self.mo_energy = mf.mo_energy @@ -226,50 +285,111 @@ def nmo(self, n): get_nmo = get_nmo get_frozen_mask = get_frozen_mask - def kernel(self, mo_energy=None, mo_coeff=None, Lpq=None, nw=40, x0=0.5): + def kernel(self, mo_energy=None, mo_coeff=None, cderi_ov=None, nw=40, x0=0.5): """ - Args: - mo_energy : 1D array (nmo), mean-field mo energy - mo_coeff : 2D array (nmo, nmo), mean-field mo coefficient - Lpq : 3D array (naux, nmo, nmo), 3-index ERI - nw: integer, grid number - x0: real, scaling factor for frequency grid - - Returns: - self.e_tot : RPA total eenrgy - self.e_hf : EXX energy - self.e_corr : RPA correlation energy + The kernel function for direct RPA """ - if mo_coeff is None: - mo_coeff = _mo_without_core(self, self._scf.mo_coeff) - if mo_energy is None: - mo_energy = _mo_energy_without_core(self, self._scf.mo_energy) cput0 = (logger.process_clock(), logger.perf_counter()) + self.dump_flags() - self.e_tot, self.e_hf, self.e_corr = \ - kernel(self, mo_energy, mo_coeff, Lpq=Lpq, nw=nw, x0=x0, verbose=self.verbose) + res = kernel( + self, mo_energy, mo_coeff, + cderi_ov=cderi_ov, nw=nw, x0=x0, + verbose=self.verbose + ) + self.e_tot, self.e_hf, self.e_corr = res logger.timer(self, 'RPA', *cput0) return self.e_corr - def ao2mo(self, mo_coeff=None): + def make_e_ov(self, mo_energy=None): + """ + Compute orbital energy differences + """ + if mo_energy is None: + mo_energy = _mo_energy_without_core(self, self.mo_energy) + + nocc = self.nocc + e_ov = (mo_energy[:nocc, None] - mo_energy[None, nocc:]).ravel() + + gap = (-e_ov.max(), ) + logger.info(self, 'Lowest orbital energy difference: % 6.4e', np.min(gap)) + + if (np.min(gap) < 1e-3): + logger.warn(rpa, 'RPA code not well-defined for degenerate systems!') + logger.warn(rpa, 'Lowest orbital energy difference: % 6.4e', np.min(gap)) + + return e_ov + + def make_dielectric_matrix(self, omega, e_ov=None, cderi_ov=None, blksize=None): + """ + Args: + omega : float, frequency + e_ov : 1D array (nocc * nvir), orbital energy differences + mo_coeff : (nao, nmo), mean-field mo coefficient + cderi_ov : (naux, nocc, nvir), Cholesky decomposed ERI in OV subspace. + + Returns: + diel : 2D array (naux, naux), dielectric matrix + """ + + assert e_ov is not None + assert cderi_ov is not None + + blksize = blksize or max(e_ov.size) + + diel = 2.0 * make_dielectric_matrix( + omega, e_ov, + cderi_ov if isinstance(cderi_ov, np.ndarray) else cderi_ov["cderi_ov"], + blksize=blksize + ) + + return diel + + def ao2mo(self, mo_coeff=None, blksize=None): if mo_coeff is None: - mo_coeff = self.mo_coeff - nmo = self.nmo + mo_coeff = _mo_without_core(self, self.mo_coeff) + + nocc = self.nocc + norb = self.nmo + nvir = norb - nocc naux = self.with_df.get_naoaux() - mem_incore = (2 * nmo**2*naux) * 8 / 1e6 - mem_now = lib.current_memory()[0] - - mo = np.asarray(mo_coeff, order='F') - ijslice = (0, nmo, 0, nmo) - Lpq = None - if (mem_incore + mem_now < 0.99 * self.max_memory) or self.mol.incore_anyway: - Lpq = _ao2mo.nr_e2(self.with_df._cderi, mo, ijslice, aosym='s2', out=Lpq) - return Lpq.reshape(naux, nmo, nmo) + sov = (0, nocc, nocc, norb) # slice for OV block + + blksize = naux if blksize is None else blksize + cderi_ov = None + + cput0 = (logger.process_clock(), logger.perf_counter()) + if blksize >= naux or self.mol.incore_anyway: + assert isinstance(self.with_df._cderi, np.ndarray) + cderi_ov = _ao2mo.nr_e2( + self.with_df._cderi, mo_coeff, + sov, aosym='s2', out=cderi_ov + ) + logger.timer(self, 'incore ao2mo', *cput0) + else: - logger.warn(self, 'Memory may not be enough!') - raise NotImplementedError + fswap = lib.H5TmpFile() + fswap.create_dataset('cderi_ov', (naux, nocc * nvir)) + + q0 = 0 + for cderi in self.with_df.loop(blksize=blksize): + q1 = q0 + cderi.shape[0] + v_ov = _ao2mo.nr_e2( + cderi, mo_coeff, + sov, aosym='s2' + ) + fswap['cderi_ov'][q0:q1] = v_ov + v_ov = None + q0 = q1 + + logger.timer(self, 'outcore ao2mo', *cput0) + cderi_ov = fswap + + return cderi_ov + +RPA = dRPA = DirectRPA if __name__ == '__main__': from pyscf import gto, dft @@ -279,7 +399,7 @@ def ao2mo(self, mo_coeff=None): [8 , (0. , 0. , 0.)], [1 , (0. , -0.7571 , 0.5861)], [1 , (0. , 0.7571 , 0.5861)]] - mol.basis = 'def2-svp' + mol.basis = 'def2svp' mol.build() mf = dft.RKS(mol) @@ -287,7 +407,28 @@ def ao2mo(self, mo_coeff=None): mf.kernel() rpa = RPA(mf) - rpa.kernel() + rpa.verbose = 6 + + nocc = rpa.nocc + nvir = rpa.nmo - nocc + norb = rpa.nmo + e_ov = - (rpa.mo_energy[:nocc, None] - rpa.mo_energy[None, nocc:]).ravel() + v_ov = rpa.ao2mo(rpa.mo_coeff, blksize=1) + e_corr_0 = rpa.kernel(cderi_ov=v_ov) + print ('RPA e_tot, e_hf, e_corr = ', rpa.e_tot, rpa.e_hf, rpa.e_corr) - assert (abs(rpa.e_corr- -0.30783004035780076) < 1e-6) - assert (abs(rpa.e_tot- -76.26428191794182) < 1e-6) + assert (abs(rpa.e_corr - -0.307830040357800) < 1e-6) + assert (abs(rpa.e_tot - -76.26651423730257) < 1e-6) + + # Another implementation of direct RPA N^6 + v_ov = np.array(v_ov["cderi_ov"]) + a = e_ov * np.eye(nocc * nvir) + 2 * np.dot(v_ov.T, v_ov) + b = 2 * np.dot(v_ov.T, v_ov) + apb = a + b + amb = a - b + c = np.dot(amb, apb) + e_corr_1 = 0.5 * np.trace( + scipy.linalg.sqrtm(c) - a + ) + + assert abs(e_corr_0 - e_corr_1) < 1e-8 diff --git a/pyscf/gw/test/test_gw.py b/pyscf/gw/test/test_gw.py index 0f405f9ac2..b5c366061c 100644 --- a/pyscf/gw/test/test_gw.py +++ b/pyscf/gw/test/test_gw.py @@ -78,9 +78,8 @@ def test_gw_exact(self): def test_rpa(self): rpa_obj = rpa.RPA(mf, frozen=0) rpa_obj.kernel() - self.assertAlmostEqual(rpa_obj.e_tot, -76.26428191794182, 6) - self.assertAlmostEqual(rpa_obj.e_corr, -0.30783004035780076, 6) - + self.assertAlmostEqual(rpa_obj.e_tot, -76.26651423730257, 6) + self.assertAlmostEqual(rpa_obj.e_corr, -0.307830040357800, 6) if __name__ == "__main__": print("Full Tests for GW") diff --git a/pyscf/gw/test/test_ugw.py b/pyscf/gw/test/test_ugw.py index 2234ced2d6..81ada072ff 100644 --- a/pyscf/gw/test/test_ugw.py +++ b/pyscf/gw/test/test_ugw.py @@ -41,8 +41,9 @@ def test_gwac_pade(self): def test_rpa(self): rpa_obj = urpa.URPA(mf, frozen=0) rpa_obj.kernel() - self.assertAlmostEqual(rpa_obj.e_tot, -74.98258098665727, 6) - self.assertAlmostEqual(rpa_obj.e_corr, -0.18821540003542925, 6) + + self.assertAlmostEqual(rpa_obj.e_tot, -74.98369614250653, 6) + self.assertAlmostEqual(rpa_obj.e_corr, -0.1882153685614803, 6) if __name__ == "__main__": diff --git a/pyscf/gw/urpa.py b/pyscf/gw/urpa.py index 5d338a7638..576de8c30e 100755 --- a/pyscf/gw/urpa.py +++ b/pyscf/gw/urpa.py @@ -32,95 +32,10 @@ from pyscf.ao2mo import _ao2mo from pyscf import df, scf from pyscf.mp.ump2 import get_nocc, get_nmo, get_frozen_mask -from pyscf.gw.rpa import RPA, _get_scaled_legendre_roots -einsum = lib.einsum +import pyscf.gw.rpa -# **************************************************************************** -# core routines, kernel, rpa_ecorr, rho_response -# **************************************************************************** - -def kernel(rpa, mo_energy, mo_coeff, Lpq=None, nw=40, x0=0.5, verbose=logger.NOTE): - """ - RPA correlation and total energy - - Args: - Lpq : density fitting 3-center integral in MO basis. - nw : number of frequency point on imaginary axis. - x0: scaling factor for frequency grid. - - Returns: - e_tot : RPA total energy - e_hf : EXX energy - e_corr : RPA correlation energy - """ - mf = rpa._scf - # only support frozen core - if rpa.frozen is not None: - assert isinstance(rpa.frozen, int) - assert (rpa.frozen < rpa.nocc[0] and rpa.frozen < rpa.nocc[1]) - - if Lpq is None: - Lpq = rpa.ao2mo(mo_coeff) - - # Grids for integration on imaginary axis - freqs, wts = _get_scaled_legendre_roots(nw, x0) - - # Compute HF exchange energy (EXX) - dm = mf.make_rdm1() - uhf = scf.UHF(rpa.mol) - e_hf = uhf.energy_elec(dm=dm)[0] - e_hf += mf.energy_nuc() - - # Compute RPA correlation energy - e_corr = get_rpa_ecorr(rpa, Lpq, freqs, wts) - - # Compute total energy - e_tot = e_hf + e_corr - - logger.debug(rpa, ' RPA total energy = %s', e_tot) - logger.debug(rpa, ' EXX energy = %s, RPA corr energy = %s', e_hf, e_corr) - - return e_tot, e_hf, e_corr - -def get_rpa_ecorr(rpa, Lpq, freqs, wts): - """ - Compute RPA correlation energy - """ - mo_energy = _mo_energy_without_core(rpa, rpa._scf.mo_energy) - nocca, noccb = rpa.nocc - nw = len(freqs) - naux = Lpq[0].shape[0] - - homo = max(mo_energy[0][nocca-1], mo_energy[1][noccb-1]) - lumo = min(mo_energy[0][nocca], mo_energy[1][noccb]) - if (lumo-homo) < 1e-3: - logger.warn(rpa, 'Current RPA code not well-defined for degeneracy!') - - e_corr = 0. - for w in range(nw): - Pi = get_rho_response(freqs[w], mo_energy, Lpq[0,:,:nocca,nocca:], Lpq[1,:,:noccb,noccb:]) - ec_w = np.log(np.linalg.det(np.eye(naux) - Pi)) - ec_w += np.trace(Pi) - e_corr += 1./(2.*np.pi) * ec_w * wts[w] - - return e_corr - -def get_rho_response(omega, mo_energy, Lpqa, Lpqb): - ''' - Compute density response function in auxiliary basis at freq iw - ''' - naux, nocca, nvira = Lpqa.shape - naux, noccb, nvirb = Lpqb.shape - eia_a = mo_energy[0,:nocca,None] - mo_energy[0,None,nocca:] - eia_b = mo_energy[1,:noccb,None] - mo_energy[1,None,noccb:] - eia_a = eia_a / (omega**2 + eia_a*eia_a) - eia_b = eia_b / (omega**2 + eia_b*eia_b) - Pia_a = Lpqa * (eia_a * 2.0) - Pia_b = Lpqb * (eia_b * 2.0) - # Response from both spin-up and spin-down density - Pi = einsum('Pia, Qia -> PQ', Pia_a, Lpqa) + einsum('Pia, Qia -> PQ', Pia_b, Lpqb) - return Pi +einsum = lib.einsum def _mo_energy_without_core(rpa, mo_energy): moidx = get_frozen_mask(rpa) @@ -132,8 +47,7 @@ def _mo_without_core(rpa, mo): mo = (mo[0][:,moidx[0]], mo[1][:,moidx[1]]) return np.asarray(mo) -class URPA(RPA): - +class URPA(pyscf.gw.rpa.RPA): def dump_flags(self): log = logger.Logger(self.stdout, self.verbose) log.info('') @@ -153,57 +67,144 @@ def dump_flags(self): get_nmo = get_nmo get_frozen_mask = get_frozen_mask - def kernel(self, mo_energy=None, mo_coeff=None, Lpq=None, nw=40, x0=0.5): + def make_e_ov(self, mo_energy=None): + """ + Compute orbital energy differences + """ + if mo_energy is None: + mo_energy = _mo_energy_without_core(self, self.mo_energy) + + nocc_a, nocc_b = self.nocc + e_ov_a = (mo_energy[0][:nocc_a, None] - mo_energy[0][None, nocc_a:]).ravel() + e_ov_b = (mo_energy[1][:nocc_b, None] - mo_energy[1][None, nocc_b:]).ravel() + + gap = (-e_ov_a.max(), -e_ov_b.max()) + logger.info(self, 'Lowest orbital energy difference: (% 6.4e, % 6.4e)', gap[0], gap[1]) + + if (np.min(gap) < 1e-3): + logger.warn(self, 'RPA code not well-defined for degenerate systems!') + logger.warn(self, 'Lowest orbital energy difference: % 6.4e', np.min(gap)) + + return e_ov_a, e_ov_b + + def make_dielectric_matrix(self, omega, e_ov=None, cderi_ov=None, blksize=None): """ Args: - mo_energy : 2D array (2, nmo), mean-field mo energy - mo_coeff : 3D array (2, nmo, nmo), mean-field mo coefficient - Lpq : 4D array (2, naux, nmo, nmo), 3-index ERI - nw: integer, grid number - x0: real, scaling factor for frequency grid + omega : float, frequency + mo_energy : (2, nmo), mean-field mo energy + mo_coeff : (2, nao, nmo), mean-field mo coefficient + cderi_ov : (2, naux, nocc, nvir), Cholesky decomposed ERI in OV subspace. Returns: - self.e_tot : RPA total eenrgy - self.e_hf : EXX energy - self.e_corr : RPA correlation energy + diel : 2D array (naux, naux), dielectric matrix """ + assert cderi_ov is not None + assert e_ov is not None + + naux = self.with_df.get_naoaux() + blksize = blksize or max(e_ov[0].size, e_ov[1].size) + + diel = np.zeros((naux, naux)) + for s, e_ov_s in enumerate((e_ov[0], e_ov[1])): + cderi_ov_s = cderi_ov[s] if isinstance(cderi_ov, tuple) else cderi_ov["cderi_ov_%d" % s] + diel += pyscf.gw.rpa.make_dielectric_matrix(omega, e_ov_s, cderi_ov_s, blksize=blksize) + + return diel + + def ao2mo(self, mo_coeff=None, blksize=None): if mo_coeff is None: - mo_coeff = _mo_without_core(self, self._scf.mo_coeff) - if mo_energy is None: - mo_energy = _mo_energy_without_core(self, self._scf.mo_energy) + mo_coeff = _mo_without_core(self, self.mo_coeff) - cput0 = (logger.process_clock(), logger.perf_counter()) - self.dump_flags() - self.e_tot, self.e_hf, self.e_corr = \ - kernel(self, mo_energy, mo_coeff, Lpq=Lpq, nw=nw, x0=x0, verbose=self.verbose) + mo_coeff_a = mo_coeff[0] + mo_coeff_b = mo_coeff[1] - logger.timer(self, 'RPA', *cput0) - return self.e_corr + nocc_a, nocc_b = self.nocc + norb_a, norb_b = self.nmo + nvir_a, nvir_b = norb_a - nocc_a, norb_b - nocc_b - def ao2mo(self, mo_coeff=None): - nmoa, nmob = self.nmo - nao = self.mo_coeff[0].shape[0] naux = self.with_df.get_naoaux() - mem_incore = (nmoa**2*naux + nmob**2*naux + nao**2*naux) * 8/1e6 - mem_now = lib.current_memory()[0] - - moa = np.asarray(mo_coeff[0], order='F') - mob = np.asarray(mo_coeff[1], order='F') - ijslicea = (0, nmoa, 0, nmoa) - ijsliceb = (0, nmob, 0, nmob) - Lpqa = None - Lpqb = None - if (mem_incore + mem_now < 0.99*self.max_memory) or self.mol.incore_anyway: - Lpqa = _ao2mo.nr_e2(self.with_df._cderi, moa, ijslicea, aosym='s2', out=Lpqa) - Lpqb = _ao2mo.nr_e2(self.with_df._cderi, mob, ijsliceb, aosym='s2', out=Lpqb) - return np.asarray((Lpqa.reshape(naux,nmoa,nmoa),Lpqb.reshape(naux,nmob,nmob))) + sov_a = (0, nocc_a, nocc_a, norb_a) + sov_b = (0, nocc_b, nocc_b, norb_b) + + blksize = naux if blksize is None else blksize + cderi_ov = None + cderi_ov_a = None + cderi_ov_b = None + + cput0 = (logger.process_clock(), logger.perf_counter()) + if blksize >= naux or self.mol.incore_anyway: + assert isinstance(self.with_df._cderi, np.ndarray) + cderi_ov_a = _ao2mo.nr_e2( + self.with_df._cderi, mo_coeff_a, + sov_a, aosym='s2', out=cderi_ov_a + ) + + cderi_ov_b = _ao2mo.nr_e2( + self.with_df._cderi, mo_coeff_b, + sov_b, aosym='s2', out=cderi_ov_b + ) + cderi_ov = (cderi_ov_a, cderi_ov_b) + + logger.timer(self, 'incore ao2mo', *cput0) + else: - logger.warn(self, 'Memory may not be enough!') - raise NotImplementedError + fswap = lib.H5TmpFile() + fswap.create_dataset('cderi_ov_0', (naux, nocc_a * nvir_a), 'f8') + fswap.create_dataset('cderi_ov_1', (naux, nocc_b * nvir_b), 'f8') + + q0 = 0 + for cderi in self.with_df.loop(blksize=blksize): + q1 = q0 + cderi.shape[0] + + v_ov_a = _ao2mo.nr_e2( + cderi, mo_coeff_a, + sov_a, aosym='s2' + ) + fswap['cderi_ov_0'][q0:q1] = v_ov_a + v_ov_a = None + + v_ov_b = _ao2mo.nr_e2( + cderi, mo_coeff_b, + sov_b, aosym='s2' + ) + fswap['cderi_ov_1'][q0:q1] = v_ov_b + v_ov_b = None + + q0 = q1 + + logger.timer(self, 'outcore ao2mo', *cput0) + + cderi_ov = fswap + + return cderi_ov if __name__ == '__main__': from pyscf import gto, dft + # Closed-shell unrestricted RPA + mol = gto.Mole() + mol.verbose = 4 + mol.atom = [ + [8 , (0. , 0. , 0.)], + [1 , (0. , -0.7571 , 0.5861)], + [1 , (0. , 0.7571 , 0.5861)]] + mol.basis = 'def2svp' + mol.build() + + mf = dft.UKS(mol) + mf.xc = 'pbe' + mf.kernel() + + # Shall be identical to the restricted RPA result + rpa = URPA(mf) + rpa.max_memory = 0 + rpa.verbose = 5 + rpa.kernel() + print ('RPA e_tot, e_hf, e_corr = ', rpa.e_tot, rpa.e_hf, rpa.e_corr) + assert (abs(rpa.e_corr - -0.307830040357800) < 1e-6) + assert (abs(rpa.e_tot - -76.26651423730257) < 1e-6) + + # Open-shell RPA mol = gto.Mole() mol.verbose = 4 mol.atom = 'F 0 0 0' @@ -213,10 +214,17 @@ def ao2mo(self, mo_coeff=None): mf = dft.UKS(mol) mf.xc = 'pbe0' + mf.max_memory = 0 mf.kernel() rpa = URPA(mf) + rpa.max_memory = 0 + rpa.verbose = 5 rpa.kernel() print ('RPA e_tot, e_hf, e_corr = ', rpa.e_tot, rpa.e_hf, rpa.e_corr) - assert (abs(rpa.e_corr- -0.20980646878974454) < 1e-6) - assert (abs(rpa.e_tot- -99.49292565821425) < 1e-6) + assert (abs(rpa.e_corr - -0.20980646878974454) < 1e-6) + assert (abs(rpa.e_tot - -99.49455969299747) < 1e-6) + + + + From 10f89c376371ed55b99075cd77ba209181629ca2 Mon Sep 17 00:00:00 2001 From: Xiaojie Wu Date: Mon, 25 Mar 2024 16:31:40 -0700 Subject: [PATCH 24/44] DFT wrapper supporting conventional DFT methods (#2084) * added solvent models * add example for RHF * cleanup variables * support casci casscf and ccsd * uncomment unittests * change example name * update reset * for flake8 * fixed a bug in soscf/newton_ah.py * updated for recent master changes * remove whitespace * remove whitespace * native support dispersion correction * fixed a bug in pcm * Update hf.py * move dispersion to addons * remove disp in RKS * call get_dispersion in kernels * added unit test for d4 * fixed dispersion correction in testing h2o * updated ci * skip unittest if dftd3 or dftd4 is missing * new high level dft wrapper * clean up & move dispersion to energy_tot * remove disp from dft _keys * fixed import dftd3_xc_map * fixed typo * added a wrapper for is_nlc * replaced more ni.libxc.is_nlc with ni.is_nlc * added wb97x-d3 as a special case * added disp_3body * removesuffix -> replace * address qiming's comments * add dft_parser && change back to libxc.is_nlc * fixed unit test * disable wb97x-d3, wb97x-d * skip dftd3 for py3.12 & skip wb97x-d3 * Update dft_parser; Add tests for dft_parser * Add warning messages * Update test_h2o.py --------- Co-authored-by: Qiming Sun --- examples/dft/00-simple_dft.py | 10 ++++ pyscf/dft/dft_parser.py | 87 +++++++++++++++++++++++++++++++++++ pyscf/dft/libxc.py | 12 ++++- pyscf/dft/rks.py | 5 +- pyscf/dft/test/test_h2o.py | 37 ++++++++++++++- pyscf/dft/test/test_libxc.py | 14 ++++++ pyscf/dft/xcfun.py | 10 +++- pyscf/grad/dispersion.py | 43 ++++++++++------- pyscf/hessian/dispersion.py | 52 ++++++++++++--------- pyscf/scf/dispersion.py | 35 ++++++++------ pyscf/scf/hf.py | 17 ++++--- 11 files changed, 255 insertions(+), 67 deletions(-) create mode 100644 pyscf/dft/dft_parser.py diff --git a/examples/dft/00-simple_dft.py b/examples/dft/00-simple_dft.py index 7ba632fc6f..a84f56110e 100644 --- a/examples/dft/00-simple_dft.py +++ b/examples/dft/00-simple_dft.py @@ -34,3 +34,13 @@ # Orbital energies, Mulliken population etc. mf.analyze() + +# shorten dft names +mf = mol.KS(xc='b3lyp-d3bj') +#mf = mol.KS(xc='b3lyp-d3zero') +#mf = mol.KS(xc='b3lyp-d3bj2b') +#mf = mol.KS(xc='b3lyp-d3bjatm') +#mf = mol.KS(xc='b3lyp-d4') +#mf = mol.KS(xc='wb97x-v') +#mf = mol.KS(xc='wb97m-d3bj) +#mf = mol.KS(xc='wb97x-d3) diff --git a/pyscf/dft/dft_parser.py b/pyscf/dft/dft_parser.py new file mode 100644 index 0000000000..d9fdd05cb9 --- /dev/null +++ b/pyscf/dft/dft_parser.py @@ -0,0 +1,87 @@ + +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' +unified dft parser for coordinating dft protocols with +1. xc functionals +2. dispersion corrections / nonlocal correction +3. GTO basis (TODO) +4. geometrical counterpoise (gCP) correction (TODO) +''' + +from functools import lru_cache +import warnings + +# supported dispersion corrections +DISP_VERSIONS = ['d3bj', 'd3zero', 'd3bjm', 'd3zerom', 'd3op', 'd4'] + +@lru_cache(128) +def parse_dft(dft_method): + ''' conventional dft method -> + (xc, enable nlc, (xc for dftd3, dispersion version, with 3-body dispersion)) + ''' + if not isinstance(dft_method, str): + return dft_method, None, (dft_method, None, False) + method_lower = dft_method.lower() + + # special cases: + # - wb97x-d is not supported yet + # - wb97*-d3bj is wb97*-v with d3bj + # - wb97x-d3 is not supported yet + # - 3c method is not supported yet + + if method_lower == 'wb97x-d': + raise NotImplementedError('wb97x-d is not supported yet.') + + if method_lower == 'wb97m-d3bj': + return 'wb97m-v', False, ('wb97m', 'd3bj', False) + if method_lower == 'b97m-d3bj': + return 'b97m-v', False, ('b97m', 'd3bj', False) + if method_lower == 'wb97x-d3bj': + return 'wb97x-v', False, ('wb97x', 'd3bj', False) + + # J. Chem. Theory Comput. 2013, 9, 1, 263-272 + if method_lower in ['wb97x-d3']: + raise NotImplementedError('wb97x-d3 is not supported yet.') + + if method_lower.endswith('-3c'): + raise NotImplementedError('*-3c methods are not supported yet.') + + xc = dft_method + disp = None + for d in DISP_VERSIONS: + if method_lower.endswith(d): + disp = d + with_3body = False + xc = method_lower.replace(f'-{d}','') + elif method_lower.endswith(d+'2b'): + disp = d + with_3body = False + xc = method_lower.replace(f'-{d}2b', '') + elif method_lower.endswith(d+'atm'): + disp = d + with_3body = True + xc = method_lower.replace(f'-{d}atm', '') + + if disp is not None: + if xc in ('b97m', 'wb97m'): + warnings.warn( + f'{dft_method} is not a well-defined functional. ' + 'The XC part is changed to {xc}-v') + return xc+'-v', False, (xc, disp, with_3body) + else: + return xc, None, (xc, disp, with_3body) + + return xc, None, (xc, None, False) diff --git a/pyscf/dft/libxc.py b/pyscf/dft/libxc.py index 8e6d51267f..6b2a841a52 100644 --- a/pyscf/dft/libxc.py +++ b/pyscf/dft/libxc.py @@ -30,7 +30,7 @@ from functools import lru_cache from pyscf import lib from pyscf.dft.xc.utils import remove_dup, format_xc_code -from pyscf.dft import xc_deriv +from pyscf.dft import xc_deriv, dft_parser from pyscf import __config__ _itrf = lib.load_library('libxc_itrf') @@ -922,6 +922,10 @@ def is_gga(xc_code): @lru_cache(100) def is_nlc(xc_code): + enable_nlc = dft_parser.parse_dft(xc_code)[1] + if enable_nlc is False: + return False + # identify nlc by xc_code itself if enable_nlc is None if isinstance(xc_code, str): if xc_code.isdigit(): return _itrf.LIBXC_is_nlc(ctypes.c_int(int(xc_code))) @@ -1087,6 +1091,7 @@ def parse_xc(description): decoded XC description, with the data structure (hybrid, alpha, omega), ((libxc-Id, fac), (libxc-Id, fac), ...) ''' # noqa: E501 + hyb = [0, 0, 0] # hybrid, alpha, omega (== SR_HF, LR_HF, omega) if description is None: return tuple(hyb), () @@ -1105,6 +1110,8 @@ def parse_xc(description): 'To restore the VWN5 definition, you can put the setting ' '"B3LYP_WITH_VWN5 = True" in pyscf_conf.py') + description = dft_parser.parse_dft(description)[0] + def assign_omega(omega, hyb_or_sr, lr=0): if hyb[2] == omega or omega == 0: hyb[0] += hyb_or_sr @@ -1231,6 +1238,8 @@ def possible_c_for(key): parse_token(token, 'C') else: for token in description.replace('-', '+-').replace(';+', ';').split('+'): + # dftd3 cannot be used in a custom xc description + assert '-d3' not in token parse_token(token, 'compound XC', search_xc_alias=True) if hyb[2] == 0: # No omega is assigned. LR_HF is 0 for normal Coulomb operator hyb[1] = 0 @@ -1251,6 +1260,7 @@ def possible_c_for(key): 'WB97X-D' : 'WB97X_D', 'WB97X-V' : 'WB97X_V', 'WB97M-V' : 'WB97M_V', + 'WB97X-D3' : 'WB97X_D3', 'B97M-V' : 'B97M_V', 'M05-2X' : 'M05_2X', 'M06-L' : 'M06_L', diff --git a/pyscf/dft/rks.py b/pyscf/dft/rks.py index 2a5d82c2d1..5972f98e20 100644 --- a/pyscf/dft/rks.py +++ b/pyscf/dft/rks.py @@ -260,7 +260,6 @@ def define_xc_(ks, description, xctype='LDA', hyb=0, rsh=(0,0,0)): ks._numint = libxc.define_xc_(ks._numint, description, xctype, hyb, rsh) return ks - def _dft_common_init_(mf, xc='LDA,VWN'): raise DeprecationWarning @@ -321,10 +320,12 @@ class KohnShamDFT: -76.415443079840458 ''' - _keys = {'xc', 'nlc', 'grids', 'nlcgrids', 'small_rho_cutoff'} + _keys = {'xc', 'nlc', 'grids', 'disp', 'disp_with_3body', 'nlcgrids', 'small_rho_cutoff'} def __init__(self, xc='LDA,VWN'): self.xc = xc + self.disp = None + self.disp_with_3body = None self.nlc = '' self.grids = gen_grid.Grids(self.mol) self.grids.level = getattr( diff --git a/pyscf/dft/test/test_h2o.py b/pyscf/dft/test/test_h2o.py index 7eee60fe8c..1c1c1ececf 100644 --- a/pyscf/dft/test/test_h2o.py +++ b/pyscf/dft/test/test_h2o.py @@ -501,6 +501,41 @@ def test_nr_uks_vv10_high_cost(self): method.nlcgrids.atom_grid = {"H": (40, 110), "O": (40, 110),} self.assertAlmostEqual(method.scf(), -76.352381513158718, 8) + @unittest.skipIf('dftd3' not in sys.modules, "requires the dftd3 library") + def test_dft_parser(self): + from pyscf.scf import dispersion + method = dft.RKS(h2o, xc='wb97m-d3bj') + e_disp = dispersion.get_dispersion(method) + self.assertAlmostEqual(e_disp, -0.0007551366628786623, 9) + assert method._numint.libxc.is_nlc(method.xc) == False + fn_facs = method._numint.libxc.parse_xc(method.xc) + assert fn_facs[1][0][0] == 531 + + method = dft.RKS(h2o, xc='wb97x-d3bj') + e_disp = dispersion.get_dispersion(method) + self.assertAlmostEqual(e_disp, -0.0005697890844546384, 9) + assert method._numint.libxc.is_nlc(method.xc) == False + fn_facs = method._numint.libxc.parse_xc(method.xc) + assert fn_facs[1][0][0] == 466 + + method = dft.RKS(h2o, xc='b3lyp-d3bj') + e_disp = dispersion.get_dispersion(method) + self.assertAlmostEqual(e_disp, -0.0005738788210828446, 9) + fn_facs = method._numint.libxc.parse_xc(method.xc) + assert fn_facs[1][0][0] == 402 + + method = dft.RKS(h2o, xc='b3lyp-d3bjm2b') + e_disp = dispersion.get_dispersion(method) + self.assertAlmostEqual(e_disp, -0.0006949127588605776, 9) + + method = dft.RKS(h2o, xc='b3lyp-d3bjmatm') + e_disp = dispersion.get_dispersion(method) + self.assertAlmostEqual(e_disp, -0.0006949125270554931, 9) + + method = dft.UKS(h2o, xc='b3lyp-d3bjmatm') + e_disp = dispersion.get_dispersion(method) + self.assertAlmostEqual(e_disp, -0.0006949125270554931, 9) + def test_camb3lyp_rsh_omega(self): mf = dft.RKS(h2o) mf.grids.atom_grid = {"H": (50, 194), "O": (50, 194),} @@ -524,7 +559,7 @@ def test_dispersion(self): mf.xc = 'B3LYP' mf.disp = 'd3bj' mf.run(xc='B3LYP') - self.assertAlmostEqual(mf.e_tot, -76.38945547396322, 9) + self.assertAlmostEqual(mf.e_tot, -76.38552043811778, 9) def test_reset(self): mf = dft.RKS(h2o).newton() diff --git a/pyscf/dft/test/test_libxc.py b/pyscf/dft/test/test_libxc.py index 4bacb402e5..f4223fdeec 100644 --- a/pyscf/dft/test/test_libxc.py +++ b/pyscf/dft/test/test_libxc.py @@ -341,6 +341,20 @@ def test_m06(self): self.assertAlmostEqual(abs(numpy.hstack([fxc[i] for i in [0,1,2,4,6,9]])-fxc_ref).max(), 0, 7) self.assertAlmostEqual(abs(numpy.hstack([kxc[i] for i in [0,1,2,3,5,7,10,12,15,19]])-kxc_ref).max(), 0, 6) + def test_dft_parser(self): + from pyscf.dft.dft_parser import parse_dft + self.assertEqual(parse_dft('wb97m-d3bj'), ('wb97m-v', False, ('wb97m', 'd3bj', False))) + self.assertEqual(dft.libxc.parse_xc('wb97m-d3bj')[1][0][0], 531) + self.assertTrue(not dft.libxc.is_nlc('wb97m-d3bj')) + + self.assertEqual(parse_dft('wb97-d3zerom'), ('wb97', None, ('wb97', 'd3zerom', False))) + self.assertTrue(not dft.libxc.is_nlc('wb97-d3zerom')) + + self.assertEqual(parse_dft('wb97m-d3bjatm'), ('wb97m-v', False, ('wb97m', 'd3bj', True))) + self.assertTrue(not dft.libxc.is_nlc('wb97m-d3bjatm')) + + self.assertEqual(parse_dft('wb97x-d3zero2b'), ('wb97x', None, ('wb97x', 'd3zero', False))) + self.assertTrue(not dft.libxc.is_nlc('wb97x-d3zero2b')) if __name__ == "__main__": print("Test libxc") diff --git a/pyscf/dft/xcfun.py b/pyscf/dft/xcfun.py index c49c5e9ab8..d830c87b64 100644 --- a/pyscf/dft/xcfun.py +++ b/pyscf/dft/xcfun.py @@ -28,7 +28,7 @@ import numpy from pyscf import lib from pyscf.dft.xc.utils import remove_dup, format_xc_code -from pyscf.dft import xc_deriv +from pyscf.dft import xc_deriv, dft_parser from pyscf import __config__ _itrf = lib.load_library('libxcfun_itrf') @@ -318,6 +318,9 @@ def is_gga(xc_code): VV10_XC.update([(5000+i, VV10_XC[key]) for i, key in enumerate(VV10_XC)]) def is_nlc(xc_code): + enable_nlc = dft_parser.parse_dft(xc_code)[1] + if enable_nlc is False: + return False fn_facs = parse_xc(xc_code)[1] return any(xid >= 5000 for xid, c in fn_facs) @@ -420,6 +423,8 @@ def parse_xc(description): elif not isinstance(description, str): #isinstance(description, (tuple,list)): return parse_xc('%s,%s' % tuple(description)) + description = dft_parser.parse_dft(description)[0] + def assign_omega(omega, hyb_or_sr, lr=0): if hyb[2] == omega or omega == 0: hyb[0] += hyb_or_sr @@ -430,6 +435,7 @@ def assign_omega(omega, hyb_or_sr, lr=0): hyb[2] = omega else: raise ValueError('Different values of omega found for RSH functionals') + fn_facs = [] def parse_token(token, suffix, search_xc_alias=False): if token: @@ -503,6 +509,8 @@ def parse_token(token, suffix, search_xc_alias=False): parse_token(token, 'C') else: for token in description.replace('-', '+-').replace(';+', ';').split('+'): + # dftd3 cannot be used in a custom xc description + assert '-d3' not in token parse_token(token, 'XC', search_xc_alias=True) if hyb[2] == 0: # No omega is assigned. LR_HF is 0 for normal Coulomb operator hyb[1] = 0 diff --git a/pyscf/grad/dispersion.py b/pyscf/grad/dispersion.py index b4865db0a8..2cd5fe705c 100644 --- a/pyscf/grad/dispersion.py +++ b/pyscf/grad/dispersion.py @@ -21,36 +21,45 @@ ''' import numpy -from pyscf.scf.hf import KohnShamDFT +from pyscf.dft.rks import KohnShamDFT +from pyscf.dft import dft_parser -def get_dispersion(mf_grad, disp_version=None): +def get_dispersion(mf_grad, disp_version=None, with_3body=False): '''gradient of dispersion correction for RHF/RKS''' + mf = mf_grad.base + mol = mf.mol + if isinstance(mf, KohnShamDFT): + method = mf.xc + else: + method = 'hf' + method, disp, with_3body = dft_parser.parse_dft(method)[2] + + # priority: args > mf.disp > dft_parser if disp_version is None: - disp_version = mf_grad.base.disp - mol = mf_grad.base.mol - disp_version = mf_grad.base.disp + disp_version = disp + # dispersion version can be customized via mf.disp + if hasattr(mf, 'disp') and mf.disp is not None: + disp_version = mf.disp + if disp_version is None: return numpy.zeros([mol.natm,3]) - if isinstance(mf_grad.base, KohnShamDFT): - method = mf_grad.base.xc - else: - method = 'hf' + # 3-body contribution can be disabled with mf.disp_with_3body + if hasattr(mf, 'disp_with_3body') and mf.disp_with_3body is not None: + with_3body = mf.disp_with_3body if disp_version[:2].upper() == 'D3': # raised error in SCF module, assuming dftd3 installed import dftd3.pyscf as disp - d3 = disp.DFTD3Dispersion(mol, xc=method, version=disp_version) + d3 = disp.DFTD3Dispersion(mol, xc=method, version=disp_version, atm=with_3body) _, g_d3 = d3.kernel() return g_d3 elif disp_version[:2].upper() == 'D4': - from pyscf.data.elements import charge - atoms = numpy.array([ charge(a[0]) for a in mol._atom]) - coords = mol.atom_coords() - from dftd4.interface import DampingParam, DispersionModel - model = DispersionModel(atoms, coords) - res = model.get_dispersion(DampingParam(method=method), grad=True) - return res.get("gradient") + # raised error in SCF module, assuming dftd3 installed + import dftd4.pyscf as disp + d4 = disp.DFTD4Dispersion(mol, xc=method, atm=with_3body) + _, g_d4 = d4.kernel() + return g_d4 else: raise RuntimeError(f'dispersion correction: {disp_version} is not supported.') diff --git a/pyscf/hessian/dispersion.py b/pyscf/hessian/dispersion.py index 8751da7e1c..728b01e2cf 100644 --- a/pyscf/hessian/dispersion.py +++ b/pyscf/hessian/dispersion.py @@ -22,21 +22,33 @@ import numpy -from pyscf.scf.hf import KohnShamDFT +from pyscf.dft.rks import KohnShamDFT +from pyscf.dft import dft_parser -def get_dispersion(hessobj, disp_version=None): +def get_dispersion(hessobj, disp_version=None, with_3body=False): + mf = hessobj.base + mol = mf.mol + if isinstance(mf, KohnShamDFT): + method = mf.xc + else: + method = 'hf' + method, disp, with_3body = dft_parser.parse_dft(method)[2] + + # priority: args > mf.disp > dft_parser if disp_version is None: - disp_version = hessobj.base.disp - mol = hessobj.base.mol + disp_version = disp + # dispersion version can be customized via mf.disp + if hasattr(mf, 'disp') and mf.disp is not None: + disp_version = mf.disp + natm = mol.natm - mf = hessobj.base h_disp = numpy.zeros([natm,natm,3,3]) if disp_version is None: return h_disp - if isinstance(hessobj.base, KohnShamDFT): - method = hessobj.base.xc - else: - method = 'hf' + + # 3-body contribution can be disabled with mf.disp_with_3body + if hasattr(mf, 'disp_with_3body') and mf.disp_with_3body is not None: + with_3body = mf.disp_with_3body if mf.disp[:2].upper() == 'D3': import dftd3.pyscf as disp @@ -47,12 +59,12 @@ def get_dispersion(hessobj, disp_version=None): for j in range(3): coords[i,j] += eps mol.set_geom_(coords, unit='Bohr') - d3 = disp.DFTD3Dispersion(mol, xc=method, version=mf.disp) + d3 = disp.DFTD3Dispersion(mol, xc=method, version=mf.disp, atm=with_3body) _, g1 = d3.kernel() coords[i,j] -= 2.0*eps mol.set_geom_(coords, unit='Bohr') - d3 = disp.DFTD3Dispersion(mol, xc=method, version=mf.disp) + d3 = disp.DFTD3Dispersion(mol, xc=method, version=mf.disp, atm=with_3body) _, g2 = d3.kernel() coords[i,j] += eps @@ -60,27 +72,21 @@ def get_dispersion(hessobj, disp_version=None): return h_disp elif mf.disp[:2].upper() == 'D4': - from pyscf.data.elements import charge - atoms = numpy.array([ charge(a[0]) for a in mol._atom]) - coords = mol.atom_coords() - natm = mol.natm - from dftd4.interface import DampingParam, DispersionModel - params = DampingParam(method=method) + import dftd4.pyscf as disp + coords = hessobj.mol.atom_coords() mol = mol.copy() eps = 1e-5 for i in range(natm): for j in range(3): coords[i,j] += eps mol.set_geom_(coords, unit='Bohr') - model = DispersionModel(atoms, coords) - res = model.get_dispersion(params, grad=True) - g1 = res.get("gradient") + d4 = disp.DFTD4Dispersion(mol, xc=method, atm=with_3body) + _, g1 = d4.kernel() coords[i,j] -= 2.0*eps mol.set_geom_(coords, unit='Bohr') - model = DispersionModel(atoms, coords) - res = model.get_dispersion(params, grad=True) - g2 = res.get("gradient") + d4 = disp.DFTD4Dispersion(mol, xc=method, atm=with_3body) + _, g2 = d4.kernel() coords[i,j] += eps h_disp[i,:,j,:] = (g1 - g2)/(2.0*eps) diff --git a/pyscf/scf/dispersion.py b/pyscf/scf/dispersion.py index 607ab5c9c7..94e9018ff7 100644 --- a/pyscf/scf/dispersion.py +++ b/pyscf/scf/dispersion.py @@ -20,20 +20,29 @@ dispersion correction for HF and DFT ''' - -import numpy -from pyscf.scf.hf import KohnShamDFT +from pyscf.dft.rks import KohnShamDFT +from pyscf.dft import dft_parser def get_dispersion(mf, disp_version=None): - if disp_version is None: - disp_version = mf.disp mol = mf.mol - if disp_version is None: - return 0.0 if isinstance(mf, KohnShamDFT): method = mf.xc else: method = 'hf' + method, disp, with_3body = dft_parser.parse_dft(method)[2] + + # priority: args > mf.disp > dft_parser + if disp_version is None: + disp_version = disp + # dispersion version can be customized via mf.disp + if hasattr(mf, 'disp') and mf.disp is not None: + disp_version = mf.disp + if disp_version is None: + return 0.0 + + # 3-body contribution can be disabled with mf.disp_with_3body + if hasattr(mf, 'disp_with_3body') and mf.disp_with_3body is not None: + with_3body = mf.disp_with_3body # for dftd3 if disp_version[:2].upper() == 'D3': @@ -47,18 +56,15 @@ def get_dispersion(mf, disp_version=None): pip3 install dftd3 \n \ **************************************") - d3 = disp.DFTD3Dispersion(mol, xc=method, version=disp_version) + d3 = disp.DFTD3Dispersion(mol, xc=method, version=disp_version, atm=with_3body) e_d3, _ = d3.kernel() mf.scf_summary['dispersion'] = e_d3 return e_d3 # for dftd4 elif disp_version[:2].upper() == 'D4': - from pyscf.data.elements import charge - atoms = numpy.array([ charge(a[0]) for a in mol._atom]) - coords = mol.atom_coords() try: - from dftd4.interface import DampingParam, DispersionModel + import dftd4.pyscf as disp except ImportError: raise ImportError("\n \ cannot find dftd4 in the current environment. \n \ @@ -67,9 +73,8 @@ def get_dispersion(mf, disp_version=None): pip3 install dftd4 \n \ ***************************************") - model = DispersionModel(atoms, coords) - res = model.get_dispersion(DampingParam(method=method), grad=False) - e_d4 = res.get("energy") + d4 = disp.DFTD4Dispersion(mol, xc=method, atm=with_3body) + e_d4, _ = d4.kernel() mf.scf_summary['dispersion'] = e_d4 return e_d4 else: diff --git a/pyscf/scf/hf.py b/pyscf/scf/hf.py index 7119745708..209fa26afe 100644 --- a/pyscf/scf/hf.py +++ b/pyscf/scf/hf.py @@ -229,12 +229,6 @@ def kernel(mf, conv_tol=1e-10, conv_tol_grad=None, if dump_chk: mf.dump_chk(locals()) - #FIX DISP!! - if mf.disp is not None: - e_disp = mf.get_dispersion() - mf.scf_summary['dispersion'] = e_disp - e_tot += e_disp - logger.timer(mf, 'scf_cycle', *cput0) # A post-processing hook before return mf.post_kernel(locals()) @@ -298,7 +292,16 @@ def energy_tot(mf, dm=None, h1e=None, vhf=None): ''' nuc = mf.energy_nuc() e_tot = mf.energy_elec(dm, h1e, vhf)[0] + nuc + if mf.disp is not None: + if 'dispersion' in mf.scf_summary: + e_tot += mf.scf_summary['dispersion'] + else: + e_disp = mf.get_dispersion() + mf.scf_summary['dispersion'] = e_disp + e_tot += e_disp + mf.scf_summary['nuc'] = nuc.real + return e_tot @@ -1531,7 +1534,7 @@ class SCF(lib.StreamObject): 'diis_file', 'diis_space_rollback', 'damp', 'level_shift', 'direct_scf', 'direct_scf_tol', 'conv_check', 'callback', 'mol', 'chkfile', 'mo_energy', 'mo_coeff', 'mo_occ', - 'e_tot', 'converged', 'scf_summary', 'opt', 'disp', + 'e_tot', 'converged', 'scf_summary', 'opt', 'disp', 'disp_with_3body', } def __init__(self, mol): From 0a17e425e3c3dc28cfba0b54613194909db20548 Mon Sep 17 00:00:00 2001 From: xubwa Date: Wed, 27 Mar 2024 16:03:37 -0400 Subject: [PATCH 25/44] fix dipole moment in sfx2c1e --- pyscf/x2c/sfx2c1e.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyscf/x2c/sfx2c1e.py b/pyscf/x2c/sfx2c1e.py index c3fde295c1..2cc4d61f72 100644 --- a/pyscf/x2c/sfx2c1e.py +++ b/pyscf/x2c/sfx2c1e.py @@ -128,8 +128,9 @@ def dip_moment(self, mol=None, dm=None, unit='Debye', verbose=logger.NOTE, if picture_change: xmol = self.with_x2c.get_xmol()[0] nao = xmol.nao - prp = xmol.intor_symmetric('int1e_sprsp').reshape(3,4,nao,nao)[:,0] - ao_dip = self.with_x2c.picture_change(('int1e_r', prp)) + prp = xmol.intor_symmetric('int1e_sprsp').reshape(3,4,nao,nao)[:,3] + c1 = 0.5/lib.param.LIGHT_SPEED + ao_dip = self.with_x2c.picture_change(('int1e_r', prp*c1**2)) else: ao_dip = mol.intor_symmetric('int1e_r') From 175f787372d55ef3090e46798ca1374ac00c575d Mon Sep 17 00:00:00 2001 From: Xiaojie Wu Date: Wed, 3 Apr 2024 15:14:12 -0700 Subject: [PATCH 26/44] debug and simplify to_gpu (#2149) * support and simplify to_gpu * remove comments * flake8 --- pyscf/cc/ccsd.py | 11 +++++++++-- pyscf/df/grad/rhf.py | 2 -- pyscf/df/grad/rks.py | 2 -- pyscf/df/grad/uhf.py | 2 -- pyscf/df/grad/uks.py | 2 -- pyscf/df/hessian/rhf.py | 1 - pyscf/df/hessian/rks.py | 1 - pyscf/df/hessian/uhf.py | 1 - pyscf/df/hessian/uks.py | 1 - pyscf/dft/numint.py | 8 ++++++-- pyscf/grad/rhf.py | 10 +++++++--- pyscf/grad/rks.py | 2 -- pyscf/grad/uhf.py | 2 -- pyscf/grad/uks.py | 2 -- pyscf/hessian/rhf.py | 10 +++++++--- pyscf/hessian/rks.py | 1 - pyscf/hessian/uhf.py | 2 -- pyscf/hessian/uks.py | 1 - pyscf/lib/misc.py | 4 +++- pyscf/mp/dfmp2.py | 2 -- pyscf/mp/mp2.py | 9 ++++++++- 21 files changed, 40 insertions(+), 36 deletions(-) diff --git a/pyscf/cc/ccsd.py b/pyscf/cc/ccsd.py index 65ee49486f..67aab80aae 100644 --- a/pyscf/cc/ccsd.py +++ b/pyscf/cc/ccsd.py @@ -1240,6 +1240,15 @@ def density_fit(self, auxbasis=None, with_df=None): def nuc_grad_method(self): raise NotImplementedError + # to_gpu can be reused only when __init__ still takes mf + def to_gpu(self): + mf = self.base.to_gpu() + from importlib import import_module + mod = import_module(self.__module__.replace('pyscf', 'gpu4pyscf')) + cls = getattr(mod, self.__class__.__name__) + obj = cls(mf) + return obj + class CCSD(CCSDBase): __doc__ = CCSDBase.__doc__ @@ -1365,8 +1374,6 @@ def get_d2_diagnostic(self, t2=None): if t2 is None: t2 = self.t2 return get_d2_diagnostic(t2) - to_gpu = lib.to_gpu - CC = RCCSD = CCSD diff --git a/pyscf/df/grad/rhf.py b/pyscf/df/grad/rhf.py index 91ccd3e543..cfa31375eb 100644 --- a/pyscf/df/grad/rhf.py +++ b/pyscf/df/grad/rhf.py @@ -523,6 +523,4 @@ def extra_force(self, atom_id, envs): else: return 0 - to_gpu = lib.to_gpu - Grad = Gradients diff --git a/pyscf/df/grad/rks.py b/pyscf/df/grad/rks.py index a53d3d0ae6..adfc7b7080 100644 --- a/pyscf/df/grad/rks.py +++ b/pyscf/df/grad/rks.py @@ -123,6 +123,4 @@ def extra_force(self, atom_id, envs): e1 += envs['vhf'].aux[atom_id] return e1 - to_gpu = lib.to_gpu - Grad = Gradients diff --git a/pyscf/df/grad/uhf.py b/pyscf/df/grad/uhf.py index af2e048591..0eec773b0d 100644 --- a/pyscf/df/grad/uhf.py +++ b/pyscf/df/grad/uhf.py @@ -60,6 +60,4 @@ def extra_force(self, atom_id, envs): else: return 0 - to_gpu = lib.to_gpu - Grad = Gradients diff --git a/pyscf/df/grad/uks.py b/pyscf/df/grad/uks.py index 9fa6f5cdf5..e6de663a95 100644 --- a/pyscf/df/grad/uks.py +++ b/pyscf/df/grad/uks.py @@ -124,6 +124,4 @@ def extra_force(self, atom_id, envs): e1 += envs['vhf'].aux[atom_id] return e1 - to_gpu = lib.to_gpu - Grad = Gradients diff --git a/pyscf/df/hessian/rhf.py b/pyscf/df/hessian/rhf.py index 95bc7f9dcf..d06fa9f473 100644 --- a/pyscf/df/hessian/rhf.py +++ b/pyscf/df/hessian/rhf.py @@ -480,7 +480,6 @@ def __init__(self, mf): partial_hess_elec = partial_hess_elec make_h1 = make_h1 - to_gpu = lib.to_gpu #TODO: Insert into DF class diff --git a/pyscf/df/hessian/rks.py b/pyscf/df/hessian/rks.py index 30b59fc8d1..74c1bdd6c9 100644 --- a/pyscf/df/hessian/rks.py +++ b/pyscf/df/hessian/rks.py @@ -126,7 +126,6 @@ def __init__(self, mf): partial_hess_elec = partial_hess_elec make_h1 = make_h1 - to_gpu = lib.to_gpu if __name__ == '__main__': diff --git a/pyscf/df/hessian/uhf.py b/pyscf/df/hessian/uhf.py index b252f99953..5cb20240f8 100644 --- a/pyscf/df/hessian/uhf.py +++ b/pyscf/df/hessian/uhf.py @@ -531,7 +531,6 @@ def __init__(self, mf): partial_hess_elec = partial_hess_elec make_h1 = make_h1 - to_gpu = lib.to_gpu #TODO: Insert into DF class diff --git a/pyscf/df/hessian/uks.py b/pyscf/df/hessian/uks.py index 92624a128f..1afa995973 100644 --- a/pyscf/df/hessian/uks.py +++ b/pyscf/df/hessian/uks.py @@ -139,7 +139,6 @@ def __init__(self, mf): partial_hess_elec = partial_hess_elec make_h1 = make_h1 - to_gpu = lib.to_gpu if __name__ == '__main__': diff --git a/pyscf/dft/numint.py b/pyscf/dft/numint.py index 1716042118..c448241587 100644 --- a/pyscf/dft/numint.py +++ b/pyscf/dft/numint.py @@ -2865,8 +2865,12 @@ def make_rho(idm, ao, sindex, xctype): with_lapl) return make_rho, ndms, nao - to_gpu = lib.to_gpu - + def to_gpu(self): + try: + from gpu4pyscf.dft import numint + return numint.NumInt() + except ImportError: + raise ImportError('Cannot find GPU4PySCF') _NumInt = NumInt diff --git a/pyscf/grad/rhf.py b/pyscf/grad/rhf.py index e45e3b5ed9..c89da0e1f6 100644 --- a/pyscf/grad/rhf.py +++ b/pyscf/grad/rhf.py @@ -440,8 +440,14 @@ def _tag_rdm1 (self, dm, mo_coeff, mo_occ): to be split into alpha,beta in DF-ROHF subclass''' return lib.tag_array (dm, mo_coeff=mo_coeff, mo_occ=mo_occ) + # to_gpu can be reused only when __init__ still takes mf def to_gpu(self): - raise NotImplementedError + mf = self.base.to_gpu() + from importlib import import_module + mod = import_module(self.__module__.replace('pyscf', 'gpu4pyscf')) + cls = getattr(mod, self.__class__.__name__) + obj = cls(mf) + return obj # export the symbol GradientsMixin for backward compatibility. # GradientsMixin should be dropped in the future. @@ -463,8 +469,6 @@ def make_rdm1e(self, mo_energy=None, mo_coeff=None, mo_occ=None): grad_elec = grad_elec - to_gpu = lib.to_gpu - Grad = Gradients from pyscf import scf diff --git a/pyscf/grad/rks.py b/pyscf/grad/rks.py index bb0198140d..7aee5fadfc 100644 --- a/pyscf/grad/rks.py +++ b/pyscf/grad/rks.py @@ -622,8 +622,6 @@ def extra_force(self, atom_id, envs): else: return 0 - to_gpu = lib.to_gpu - Grad = Gradients from pyscf import dft diff --git a/pyscf/grad/uhf.py b/pyscf/grad/uhf.py index 949b7abf44..0f46458975 100644 --- a/pyscf/grad/uhf.py +++ b/pyscf/grad/uhf.py @@ -106,8 +106,6 @@ def make_rdm1e(self, mo_energy=None, mo_coeff=None, mo_occ=None): grad_elec = grad_elec - to_gpu = lib.to_gpu - Grad = Gradients from pyscf import scf diff --git a/pyscf/grad/uks.py b/pyscf/grad/uks.py index cc73955814..644ab01584 100644 --- a/pyscf/grad/uks.py +++ b/pyscf/grad/uks.py @@ -275,8 +275,6 @@ def extra_force(self, atom_id, envs): else: return 0 - to_gpu = lib.to_gpu - Grad = Gradients from pyscf import dft diff --git a/pyscf/hessian/rhf.py b/pyscf/hessian/rhf.py index 9736eeff5c..f40df1bbe8 100644 --- a/pyscf/hessian/rhf.py +++ b/pyscf/hessian/rhf.py @@ -599,16 +599,20 @@ def kernel(self, mo_energy=None, mo_coeff=None, mo_occ=None, atmlst=None): gen_hop = gen_hop + # to_gpu can be reused only when __init__ still takes mf def to_gpu(self): - raise NotImplementedError - + mf = self.base.to_gpu() + from importlib import import_module + mod = import_module(self.__module__.replace('pyscf', 'gpu4pyscf')) + cls = getattr(mod, self.__class__.__name__) + obj = cls(mf) + return obj class Hessian(HessianBase): partial_hess_elec = partial_hess_elec hess_elec = hess_elec make_h1 = make_h1 - to_gpu = lib.to_gpu # Inject to RHF class from pyscf import scf diff --git a/pyscf/hessian/rks.py b/pyscf/hessian/rks.py index 31ee13115c..497b053383 100644 --- a/pyscf/hessian/rks.py +++ b/pyscf/hessian/rks.py @@ -590,7 +590,6 @@ def __init__(self, mf): partial_hess_elec = partial_hess_elec hess_elec = rhf_hess.hess_elec make_h1 = make_h1 - to_gpu = lib.to_gpu from pyscf import dft dft.rks.RKS.Hessian = dft.rks_symm.RKS.Hessian = lib.class_as_method(Hessian) diff --git a/pyscf/hessian/uhf.py b/pyscf/hessian/uhf.py index 4b97fbf6f9..eabbe231d2 100644 --- a/pyscf/hessian/uhf.py +++ b/pyscf/hessian/uhf.py @@ -454,8 +454,6 @@ def solve_mo1(self, mo_energy, mo_coeff, mo_occ, h1ao_or_chkfile, fx, atmlst, max_memory, verbose, max_cycle=self.max_cycle, level_shift=self.level_shift) - to_gpu = lib.to_gpu - from pyscf import scf scf.uhf.UHF.Hessian = lib.class_as_method(Hessian) diff --git a/pyscf/hessian/uks.py b/pyscf/hessian/uks.py index 2c3941452b..17a6693461 100644 --- a/pyscf/hessian/uks.py +++ b/pyscf/hessian/uks.py @@ -667,7 +667,6 @@ def __init__(self, mf): solve_mo1 = uhf_hess.Hessian.solve_mo1 partial_hess_elec = partial_hess_elec make_h1 = make_h1 - to_gpu = lib.to_gpu from pyscf import dft dft.uks.UKS.Hessian = dft.uks_symm.UKS.Hessian = lib.class_as_method(Hessian) diff --git a/pyscf/lib/misc.py b/pyscf/lib/misc.py index 9819f2a978..65219dacaf 100644 --- a/pyscf/lib/misc.py +++ b/pyscf/lib/misc.py @@ -1379,6 +1379,7 @@ def __getattr__(self, key): # Then class can be instantiated easily like cls(omniobj) in the following # to_gpu function. omniobj = _OmniObject() +omniobj._built = True omniobj.mol = omniobj omniobj._scf = omniobj omniobj.base = omniobj @@ -1408,7 +1409,7 @@ def to_gpu(method, out=None): if isinstance(method, (SinglePointScanner, GradScanner)): method = method.undo_scanner() - import import_module + from importlib import import_module mod = import_module(method.__module__.replace('pyscf', 'gpu4pyscf')) cls = getattr(mod, method.__class__.__name__) # A temporary GPU instance. This ensures to initialize private @@ -1430,3 +1431,4 @@ def to_gpu(method, out=None): setattr(out, key, val) out.reset() return out + diff --git a/pyscf/mp/dfmp2.py b/pyscf/mp/dfmp2.py index 6522b21c19..d8cffdd57a 100644 --- a/pyscf/mp/dfmp2.py +++ b/pyscf/mp/dfmp2.py @@ -140,8 +140,6 @@ def update_amps(self, t2, eris): def init_amps(self, mo_energy=None, mo_coeff=None, eris=None, with_t2=WITH_T2): return kernel(self, mo_energy, mo_coeff, eris, with_t2) - to_gpu = lib.to_gpu - MP2 = DFMP2 from pyscf import scf diff --git a/pyscf/mp/mp2.py b/pyscf/mp/mp2.py index 9d1dc431a5..e2d9caaa09 100644 --- a/pyscf/mp/mp2.py +++ b/pyscf/mp/mp2.py @@ -649,7 +649,14 @@ def nuc_grad_method(self): def init_amps(self, mo_energy=None, mo_coeff=None, eris=None, with_t2=WITH_T2): return kernel(self, mo_energy, mo_coeff, eris, with_t2) - to_gpu = lib.to_gpu + # to_gpu can be reused only when __init__ still takes mf + def to_gpu(self): + mf = self._scf.to_gpu() + from importlib import import_module + mod = import_module(self.__module__.replace('pyscf', 'gpu4pyscf')) + cls = getattr(mod, self.__class__.__name__) + obj = cls(mf) + return obj RMP2 = MP2 From eafc3575234aca3832d270f4e1193bec2119d2b4 Mon Sep 17 00:00:00 2001 From: Hong-Zhou Ye Date: Wed, 3 Apr 2024 18:14:46 -0400 Subject: [PATCH 27/44] bug fix for RCCSD(T) with complex orbitals (#2141) * bug fix for vooo order * fix flake8 --------- Co-authored-by: hongzhouye <> --- pyscf/cc/ccsd_t.py | 2 +- pyscf/cc/ccsd_t_slow.py | 2 +- pyscf/cc/qcisd_t_slow.py | 2 +- pyscf/pbc/cc/test/test_rccsd_t_shift.py | 65 +++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 pyscf/pbc/cc/test/test_rccsd_t_shift.py diff --git a/pyscf/cc/ccsd_t.py b/pyscf/cc/ccsd_t.py index 52e0b972c8..4cb06abf84 100644 --- a/pyscf/cc/ccsd_t.py +++ b/pyscf/cc/ccsd_t.py @@ -176,7 +176,7 @@ def _sort_eri(mycc, eris, nocc, nvir, vvop, log): def _sort_t2_vooo_(mycc, orbsym, t1, t2, eris): assert (t2.flags.c_contiguous) - vooo = numpy.asarray(eris.ovoo).transpose(1,0,3,2).conj().copy() + vooo = numpy.asarray(eris.ovoo).transpose(1,0,2,3).conj().copy() nocc, nvir = t1.shape if mycc.mol.symmetry: orbsym = numpy.asarray(orbsym, dtype=numpy.int32) diff --git a/pyscf/cc/ccsd_t_slow.py b/pyscf/cc/ccsd_t_slow.py index 32d6d2960a..b22e36497c 100644 --- a/pyscf/cc/ccsd_t_slow.py +++ b/pyscf/cc/ccsd_t_slow.py @@ -45,7 +45,7 @@ def kernel(mycc, eris, t1=None, t2=None, verbose=logger.NOTE): eijk = lib.direct_sum('i,j,k->ijk', e_occ, e_occ, e_occ) eris_vvov = eris.get_ovvv().conj().transpose(1,3,0,2) - eris_vooo = numpy.asarray(eris.ovoo).conj().transpose(1,0,3,2) + eris_vooo = numpy.asarray(eris.ovoo).conj().transpose(1,0,2,3) eris_vvoo = numpy.asarray(eris.ovov).conj().transpose(1,3,0,2) fvo = eris.fock[nocc:,:nocc] def get_w(a, b, c): diff --git a/pyscf/cc/qcisd_t_slow.py b/pyscf/cc/qcisd_t_slow.py index 19a02779bc..8968449333 100644 --- a/pyscf/cc/qcisd_t_slow.py +++ b/pyscf/cc/qcisd_t_slow.py @@ -47,7 +47,7 @@ def kernel(mycc, eris, t1=None, t2=None, verbose=logger.NOTE): eijk = lib.direct_sum('i,j,k->ijk', e_occ, e_occ, e_occ) eris_vvov = eris.get_ovvv().conj().transpose(1,3,0,2) - eris_vooo = numpy.asarray(eris.ovoo).conj().transpose(1,0,3,2) + eris_vooo = numpy.asarray(eris.ovoo).conj().transpose(1,0,2,3) eris_vvoo = numpy.asarray(eris.ovov).conj().transpose(1,3,0,2) fvo = eris.fock[nocc:,:nocc] def get_w(a, b, c): diff --git a/pyscf/pbc/cc/test/test_rccsd_t_shift.py b/pyscf/pbc/cc/test/test_rccsd_t_shift.py new file mode 100644 index 0000000000..92178ab5ad --- /dev/null +++ b/pyscf/pbc/cc/test/test_rccsd_t_shift.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# Copyright 2014-2018 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Authors: Hong-Zhou Ye +# + +import unittest +import numpy as np + +from pyscf.pbc import gto, scf, cc +from pyscf.cc.ccsd_t import kernel as CCSD_T + + +def run_cell(cell, scaled_center): + kpt = cell.make_kpts([1,1,1], scaled_center=scaled_center)[0] + + mf = scf.RHF(cell, kpt=kpt).rs_density_fit() + mf.with_df.omega = 0.1 + mf.kernel() + + mcc = cc.RCCSD(mf) + eris = mcc.ao2mo() + mcc.kernel(eris=eris) + eccsd = mcc.e_corr + + et = CCSD_T(mcc, eris) + + return eccsd, et + + +class KnownValues(unittest.TestCase): + def test_water(self): + atom = ''' + O 0.00000 0.00000 0.11779 + H 0.00000 0.75545 -0.47116 + H 0.00000 -0.75545 -0.47116 + ''' + basis = 'gth-dzvp' + pseudo = 'gth-hf-rev' + a = np.eye(3) * 30 + cell = gto.M(atom=atom, basis=basis, a=a, pseudo=pseudo) + + eccsd_gamma, et_gamma = run_cell(cell, [0,0,0]) + self.assertAlmostEqual(eccsd_gamma, -0.2082317212, 8) + self.assertAlmostEqual(et_gamma , -0.0033716894, 8) + + eccsd_shifted, et_shifted = run_cell(cell, [0.1,0.1,0.1]) + self.assertAlmostEqual(eccsd_gamma, eccsd_shifted, 8) + self.assertAlmostEqual(et_gamma , et_shifted , 8) + +if __name__ == '__main__': + print("RCCSD(T) with shift k-point test") + unittest.main() From 9a152a9953f58bc632cb873bc6f9971e36216015 Mon Sep 17 00:00:00 2001 From: Michal Krompiec Date: Thu, 4 Apr 2024 15:28:07 +0100 Subject: [PATCH 28/44] Fix if mf.istype('UHF') for to_uhf() conversion --- pyscf/mp/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyscf/mp/__init__.py b/pyscf/mp/__init__.py index ae700bb3b0..3b084823c3 100644 --- a/pyscf/mp/__init__.py +++ b/pyscf/mp/__init__.py @@ -54,7 +54,7 @@ def RMP2(mf, frozen=None, mo_coeff=None, mo_occ=None): def UMP2(mf, frozen=None, mo_coeff=None, mo_occ=None): mf = mf.remove_soscf() - if mf.istype('UHF'): + if not mf.istype('UHF'): mf = mf.to_uhf() if getattr(mf, 'with_df', None): From 095130d0c0828adb58c9a5a40ede7ee95107cec7 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Mon, 11 Mar 2024 22:22:55 -0700 Subject: [PATCH 29/44] Solve conflicts between @property and __getattr__ --- pyscf/gto/mole.py | 13 ++++++------- pyscf/pbc/gto/cell.py | 15 +++++++-------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/pyscf/gto/mole.py b/pyscf/gto/mole.py index 9c0a2d2125..5f97ba0dc2 100644 --- a/pyscf/gto/mole.py +++ b/pyscf/gto/mole.py @@ -3709,12 +3709,11 @@ def __getattr__(self, key): from Mole object. ''' if key[0] == '_': # Skip private attributes and Python builtins - raise AttributeError('Mole object does not have attribute %s' % key) - elif key in ('_ipython_canary_method_should_not_exist_', - '_repr_mimebundle_'): - # https://github.com/mewwts/addict/issues/26 - # https://github.com/jupyter/notebook/issues/2014 - raise AttributeError(f'Mole object has no attribute {key}') + # https://bugs.python.org/issue45985 + # https://github.com/python/cpython/issues/103936 + # @property and __getattr__ conflicts. As a temporary fix, call + # object.__getattribute__ method to re-raise AttributeError + return object.__getattribute__(self, key) # Import all available modules. Some methods are registered to other # classes/modules when importing modules in __all__. @@ -3738,7 +3737,7 @@ def __getattr__(self, key): elif 'CI' in key or 'CC' in key or 'CAS' in key or 'MP' in key: mf = scf.HF(self) else: - raise AttributeError(f'Mole object has no attribute {key}') + return object.__getattribute__(self, key) method = getattr(mf, key) diff --git a/pyscf/pbc/gto/cell.py b/pyscf/pbc/gto/cell.py index 15b4fa26d9..713916526e 100644 --- a/pyscf/pbc/gto/cell.py +++ b/pyscf/pbc/gto/cell.py @@ -1157,12 +1157,11 @@ def __getattr__(self, key): from Cell object. ''' if key[0] == '_': # Skip private attributes and Python builtins - raise AttributeError('Cell object does not have attribute %s' % key) - elif key in ('_ipython_canary_method_should_not_exist_', - '_repr_mimebundle_'): - # https://github.com/mewwts/addict/issues/26 - # https://github.com/jupyter/notebook/issues/2014 - raise AttributeError(f'Cell object has no attribute {key}') + # https://bugs.python.org/issue45985 + # https://github.com/python/cpython/issues/103936 + # @property and __getattr__ conflicts. As a temporary fix, call + # object.__getattribute__ method to re-raise AttributeError + return object.__getattribute__(self, key) # Import all available modules. Some methods are registered to other # classes/modules when importing modules in __all__. @@ -1188,7 +1187,7 @@ def __getattr__(self, key): elif 'CI' in key or 'CC' in key or 'MP' in key: mf = scf.KHF(self) else: - raise AttributeError(f'Cell object has no attribute {key}') + return object.__getattribute__(self, key) # Remove prefix 'K' because methods are registered without the leading 'K' key = key[1:] else: @@ -1204,7 +1203,7 @@ def __getattr__(self, key): elif 'CI' in key or 'CC' in key or 'MP' in key: mf = scf.HF(self) else: - raise AttributeError(f'Cell object has no attribute {key}') + return object.__getattribute__(self, key) method = getattr(mf, key) From fdd4e487dafc605fbeaa4c63817138681c726ea1 Mon Sep 17 00:00:00 2001 From: chillenb Date: Tue, 9 Apr 2024 11:54:48 -0400 Subject: [PATCH 30/44] move static configurations from setup.py to pyproject.toml (#2144) * move testing options to pytest.ini * move static configurations from setup.py to pyproject.toml * Update CI release jobs --------- Co-authored-by: Qiming Sun --- .github/workflows/ci.yml | 2 +- .github/workflows/publish.yml | 11 ++++- .github/workflows/run_tests.sh | 4 +- NOTICE | 1 + conda/build.sh | 2 +- pyproject.toml | 68 +++++++++++++++++++++++++++++++ setup.cfg => pytest.ini | 8 +--- setup.py | 74 +++------------------------------- 8 files changed, 89 insertions(+), 81 deletions(-) create mode 100644 pyproject.toml rename setup.cfg => pytest.ini (80%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 81db232136..1da9486bb1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -72,7 +72,7 @@ jobs: echo 'pbc_tools_pbc_fft_engine = "NUMPY"' > .pyscf_conf.py && \ echo "dftd3_DFTD3PATH = './pyscf/lib/deps/lib'" >> .pyscf_conf.py && \ echo "scf_hf_SCF_mute_chkfile = True" >> .pyscf_conf.py && \ - ulimit -s 20000 && /opt/python/${{ matrix.pyver }}/bin/pytest pyscf/ --ignore=pyscf/adc --ignore=pyscf/pbc/df --ignore=pyscf/pbc/cc -s -c setup.cfg pyscf' + ulimit -s 20000 && /opt/python/${{ matrix.pyver }}/bin/pytest pyscf/ --ignore=pyscf/adc --ignore=pyscf/pbc/df --ignore=pyscf/pbc/cc -s -c pytest.ini pyscf' macos-build: runs-on: macos-latest diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index a792c40d0f..5f26d085da 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -16,7 +16,9 @@ jobs: - uses: actions/checkout@v3 - name: Build wheels run: | - docker run --rm -v ${{ github.workspace }}:/src/pyscf pyscf/pyscf-pypa-env:latest \ + docker run --rm -v ${{ github.workspace }}:/src/pyscf \ + -e CMAKE_BUILD_PARALLEL_LEVEL=4 \ + pyscf/pyscf-pypa-env:latest \ bash /src/pyscf/docker/pypa-env/build-wheels.sh - name: List available wheels run: | @@ -57,6 +59,7 @@ jobs: export src=${GITHUB_WORKSPACE:-/src/pyscf} && \ export dst=${GITHUB_WORKSPACE:-/src/pyscf}/linux-wheels && \ export CMAKE_CONFIGURE_ARGS="-DWITH_F12=OFF" && \ + export CMAKE_BUILD_PARALLEL_LEVEL=4 && \ mkdir -p /root/wheelhouse $src/linux-wheels && \ sed -i "/ if basename(fn) not in needed_libs:/s/basename.*libs/1/" /opt/_internal/pipx/venvs/auditwheel/lib/python*/site-packages/auditwheel/wheel_abi.py && \ /opt/python/${{ matrix.pyver }}/bin/pip wheel -v --no-deps --no-clean -w /root/wheelhouse $src && \ @@ -82,7 +85,8 @@ jobs: - uses: actions/checkout@v3 - name: Build sdist run: | - python3 setup.py sdist + pip install build + python3 -m build -s - name: List available sdist run: | ls ${{ github.workspace }}/dist @@ -108,6 +112,7 @@ jobs: CIBW_BUILD: cp311-macosx_x86_64 CIBW_BUILD_VERBOSITY: "1" CMAKE_CONFIGURE_ARGS: "-DWITH_F12=OFF" + CMAKE_BUILD_PARALLEL_LEVEL: "4" with: output-dir: mac-wheels - name: List available wheels @@ -133,6 +138,7 @@ jobs: # Cross-platform build for arm64 wheels on x86 platform CIBW_ARCHS_MACOS: "x86_64 universal2 arm64" CMAKE_CONFIGURE_ARGS: "-DWITH_F12=OFF" + CMAKE_BUILD_PARALLEL_LEVEL: "4" CMAKE_OSX_ARCHITECTURES: arm64 with: output-dir: mac-wheels @@ -161,6 +167,7 @@ jobs: - run: which python - name: Publish to conda run: | + export CMAKE_BUILD_PARALLEL_LEVEL=4 export ANACONDA_API_TOKEN=${{ secrets.ANACONDA_TOKEN }} conda install -y anaconda-client conda-build conda config --set anaconda_upload yes diff --git a/.github/workflows/run_tests.sh b/.github/workflows/run_tests.sh index ec53aa7d24..707313c379 100755 --- a/.github/workflows/run_tests.sh +++ b/.github/workflows/run_tests.sh @@ -10,8 +10,8 @@ echo "scf_hf_SCF_mute_chkfile = True" >> .pyscf_conf.py version=$(python -c 'import sys; print("{0}.{1}".format(*sys.version_info[:2]))') # pytest-cov on Python 3.12 consumes huge memory if [ "$RUNNER_OS" == "Linux" ] && [ $version != "3.12" ]; then - pytest pyscf/ -s -c setup.cfg \ + pytest pyscf/ -s -c pytest.ini \ --cov-report xml --cov-report term --cov-config .coveragerc --cov pyscf else - pytest pyscf/ -s -c setup.cfg pyscf + pytest pyscf/ -s -c pytest.ini pyscf fi diff --git a/NOTICE b/NOTICE index dc52a6294c..327f4732e1 100644 --- a/NOTICE +++ b/NOTICE @@ -106,6 +106,7 @@ Jiachen Li Felipe S. S. Schneider Aniruddha Seal Peter Reinholdt +Christopher Hillenbrand --- diff --git a/conda/build.sh b/conda/build.sh index 4fe6afe44b..c84e5f22f6 100755 --- a/conda/build.sh +++ b/conda/build.sh @@ -17,4 +17,4 @@ export CMAKE_CONFIGURE_ARGS="-DWITH_F12=OFF -DBLA_VENDOR=Intel10_64lp_seq" # env PYTHON not defined in certain conda-build version # $PYTHON -m pip install . -vv -pip install -v --prefix=$PREFIX . +MAKEFLAGS="-j4" pip install -v --prefix=$PREFIX . diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..7ba6770e34 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,68 @@ +[build-system] +requires = ["setuptools >= 61.0", "wheel"] +build-backend = "setuptools.build_meta" + + +[project] +name = "pyscf" +dynamic = ["version"] +description = "PySCF: Python-based Simulations of Chemistry Framework" +readme = "README.md" +classifiers = [ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Science/Research', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: C', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Topic :: Software Development', + 'Topic :: Scientific/Engineering', + 'Operating System :: POSIX', + 'Operating System :: Unix', + 'Operating System :: MacOS', +] + +maintainers = [{ name = "Qiming Sun", email = "osirpt.sun@gmail.com" }] + +authors = [{ name = "Qiming Sun", email = "osirpt.sun@gmail.com" }] + +license = { text = "Apache-2.0" } + +dependencies = [ + 'numpy>=1.13,!=1.16,!=1.17', + 'scipy!=1.5.0,!=1.5.1', + 'h5py>=2.7', + 'setuptools', +] + +[project.urls] +Homepage = "http://www.pyscf.org" +Repository = "https://github.com/pyscf/pyscf" +Documentation = "http://www.pyscf.org" + +[project.optional-dependencies] + +geomopt = ["pyberny>=0.6.2", "geometric>=0.9.7.2", "pyscf-qsdopt"] +doci = ["pyscf-doci"] +properties = ["pyscf-properties"] +semiempirical = ['pyscf-semiempirical'] +cppe = ["cppe"] +pyqmc = ["pyqmc"] +mcfun = ["mcfun>=0.2.1"] +bse = ["basis-set-exchange"] + +all = ["pyscf[geomopt,doci,properties,semiempirical,cppe,pyqmc,mcfun,bse]"] + +# extras which should not be installed by "all" components +cornell_shci = ["pyscf-cornell-shci"] +nao = ["pyscf-nao"] +fciqmcscf = ["pyscf-fciqmc"] +tblis = ["pyscf-tblis"] +icmpspt = ["pyscf-icmpspt"] # broken +shciscf = ["pyscf-shciscf"] # broken diff --git a/setup.cfg b/pytest.ini similarity index 80% rename from setup.cfg rename to pytest.ini index dfd113a2dd..b4ab584dd6 100644 --- a/setup.cfg +++ b/pytest.ini @@ -1,9 +1,4 @@ -[egg_info] -tag_build = -tag_date = 0 -tag_svn_revision = 0 - -[tool:pytest] +[pytest] addopts = --import-mode=importlib -k "not _high_cost and not _skip" --ignore=examples @@ -13,3 +8,4 @@ addopts = --import-mode=importlib --ignore-glob="*test_bz*" --ignore-glob="*pbc/cc/test/*test_h_*.py" --ignore-glob="*test_ks_noimport*.py" + diff --git a/setup.py b/setup.py index e23d90fb58..cabb82e2ee 100755 --- a/setup.py +++ b/setup.py @@ -18,37 +18,6 @@ from setuptools import setup, find_packages, Extension from setuptools.command.build_py import build_py -CLASSIFIERS = [ -'Development Status :: 5 - Production/Stable', -'Intended Audience :: Science/Research', -'Intended Audience :: Developers', -'License :: OSI Approved :: Apache Software License', -'Programming Language :: C', -'Programming Language :: Python', -'Programming Language :: Python :: 3.7', -'Programming Language :: Python :: 3.8', -'Programming Language :: Python :: 3.9', -'Programming Language :: Python :: 3.10', -'Programming Language :: Python :: 3.11', -'Programming Language :: Python :: 3.12', -'Topic :: Software Development', -'Topic :: Scientific/Engineering', -'Operating System :: POSIX', -'Operating System :: Unix', -'Operating System :: MacOS', -] - -NAME = 'pyscf' -MAINTAINER = 'Qiming Sun' -MAINTAINER_EMAIL = 'osirpt.sun@gmail.com' -DESCRIPTION = 'PySCF: Python-based Simulations of Chemistry Framework' -#LONG_DESCRIPTION = '' -URL = 'http://www.pyscf.org' -DOWNLOAD_URL = 'http://github.com/pyscf/pyscf' -LICENSE = 'Apache License 2.0' -AUTHOR = 'Qiming Sun' -AUTHOR_EMAIL = 'osirpt.sun@gmail.com' -PLATFORMS = ['Linux', 'Mac OS-X', 'Unix'] def get_version(): topdir = os.path.abspath(os.path.join(__file__, '..')) with open(os.path.join(topdir, 'pyscf', '__init__.py'), 'r') as f: @@ -59,25 +28,6 @@ def get_version(): raise ValueError("Version string not found") VERSION = get_version() -EXTRAS = { - 'geomopt': ['pyberny>=0.6.2', 'geometric>=0.9.7.2', 'pyscf-qsdopt'], - #'dmrgscf': ['pyscf-dmrgscf'], - 'doci': ['pyscf-doci'], - 'icmpspt': ['pyscf-icmpspt'], - 'properties': ['pyscf-properties'], - 'semiempirical': ['pyscf-semiempirical'], - 'shciscf': ['pyscf-shciscf'], - 'cppe': ['cppe'], - 'pyqmc': ['pyqmc'], - 'mcfun': ['mcfun>=0.2.1'], - 'bse': ['basis-set-exchange'], -} -EXTRAS['all'] = [p for extras in EXTRAS.values() for p in extras] -# extras which should not be installed by "all" components -EXTRAS['cornell_shci'] = ['pyscf-cornell-shci'] -EXTRAS['nao'] = ['pyscf-nao'] -EXTRAS['fciqmcscf'] = ['pyscf-fciqmc'] -EXTRAS['tblis'] = ['pyscf-tblis'] def get_platform(): from distutils.util import get_platform @@ -117,9 +67,11 @@ def run(self): self.spawn(cmd) self.announce('Building binaries', level=3) - # Do not use high level parallel compilation. OOM may be triggered - # when compiling certain functionals in libxc. - cmd = ['cmake', '--build', self.build_temp, '-j', '2'] + # By default do not use high level parallel compilation. + # OOM may be triggered when compiling certain functionals in libxc. + # Set the shell variable CMAKE_BUILD_PARALLEL_LEVEL=n to enable + # parallel compilation. + cmd = ['cmake', '--build', self.build_temp] build_args = os.getenv('CMAKE_BUILD_ARGS') if build_args: cmd.extend(build_args.split(' ')) @@ -150,27 +102,11 @@ def initialize_with_default_plat_name(self): 'https://github.com/scipy/scipy/issues/16151)') setup( - name=NAME, version=VERSION, - description=DESCRIPTION, - long_description_content_type="text/markdown", - long_description=DESCRIPTION, - url=URL, - download_url=DOWNLOAD_URL, - license=LICENSE, - classifiers=CLASSIFIERS, - author=AUTHOR, - author_email=AUTHOR_EMAIL, - platforms=PLATFORMS, #package_dir={'pyscf': 'pyscf'}, # packages are under directory pyscf #include *.so *.dat files. They are now placed in MANIFEST.in #package_data={'': ['*.so', '*.dylib', '*.dll', '*.dat']}, include_package_data=True, # include everything in source control packages=find_packages(exclude=['*test*', '*examples*']), cmdclass={'build_py': CMakeBuildPy}, - install_requires=['numpy>=1.13,!=1.16,!=1.17', - _scipy_version, - 'h5py>=2.7', - 'setuptools'], - extras_require=EXTRAS, ) From d57f1d6c89c723e11a7f0933380a6139ba372554 Mon Sep 17 00:00:00 2001 From: fishjojo Date: Mon, 8 Apr 2024 15:38:38 -0700 Subject: [PATCH 31/44] fix pbc df with KPoints input --- pyscf/pbc/df/df.py | 3 +++ pyscf/pbc/df/df_jk.py | 3 --- pyscf/pbc/df/fft.py | 3 +++ pyscf/pbc/df/mdf.py | 3 +++ pyscf/pbc/df/mdf_jk.py | 3 --- pyscf/pbc/df/test/test_df.py | 17 ++++++++++++++++- 6 files changed, 25 insertions(+), 7 deletions(-) diff --git a/pyscf/pbc/df/df.py b/pyscf/pbc/df/df.py index 55d5bd7f16..3791dacf8a 100644 --- a/pyscf/pbc/df/df.py +++ b/pyscf/pbc/df/df.py @@ -53,6 +53,7 @@ from pyscf.pbc.df import df_ao2mo from pyscf.pbc.df.aft import estimate_eta, _check_kpts from pyscf.pbc.df.df_jk import zdotCN +from pyscf.pbc.lib.kpts import KPoints from pyscf.pbc.lib.kpts_helper import (is_zero, gamma_point, member, unique, KPT_DIFF_TOL) from pyscf.pbc.df.gdf_builder import libpbc, _CCGDFBuilder, _CCNucBuilder @@ -146,6 +147,8 @@ def __init__(self, cell, kpts=numpy.zeros((1,3))): self.verbose = cell.verbose self.max_memory = cell.max_memory + if isinstance(kpts, KPoints): + kpts = kpts.kpts self.kpts = kpts # default is gamma point self.kpts_band = None self._auxbasis = None diff --git a/pyscf/pbc/df/df_jk.py b/pyscf/pbc/df/df_jk.py index 88c782b311..b556fd512a 100644 --- a/pyscf/pbc/df/df_jk.py +++ b/pyscf/pbc/df/df_jk.py @@ -28,7 +28,6 @@ from pyscf import lib from pyscf.lib import logger, zdotNN, zdotCN, zdotNC from pyscf.pbc import tools -from pyscf.pbc.lib.kpts import KPoints from pyscf.pbc.lib.kpts_helper import is_zero, gamma_point, member, get_kconserv_ria from pyscf import __config__ @@ -53,8 +52,6 @@ def density_fit(mf, auxbasis=None, mesh=None, with_df=None): else: kpts = numpy.reshape(mf.kpt, (1,3)) - if isinstance(kpts, KPoints): - kpts = kpts.kpts with_df = df.DF(mf.cell, kpts) with_df.max_memory = mf.max_memory with_df.stdout = mf.stdout diff --git a/pyscf/pbc/df/fft.py b/pyscf/pbc/df/fft.py index 1d538ae4ee..382f317990 100644 --- a/pyscf/pbc/df/fft.py +++ b/pyscf/pbc/df/fft.py @@ -30,6 +30,7 @@ from pyscf.pbc.df import fft_jk from pyscf.pbc.df import aft from pyscf.pbc.df.aft import _check_kpts +from pyscf.pbc.lib.kpts import KPoints from pyscf.pbc.lib.kpts_helper import is_zero from pyscf import __config__ @@ -168,6 +169,8 @@ def __init__(self, cell, kpts=numpy.zeros((1,3))): self.verbose = cell.verbose self.max_memory = cell.max_memory + if isinstance(kpts, KPoints): + kpts = kpts.kpts self.kpts = kpts self.grids = gen_grid.UniformGrids(cell) diff --git a/pyscf/pbc/df/mdf.py b/pyscf/pbc/df/mdf.py index 741f349410..6cb21cf649 100644 --- a/pyscf/pbc/df/mdf.py +++ b/pyscf/pbc/df/mdf.py @@ -38,6 +38,7 @@ from pyscf.pbc.df.gdf_builder import _CCGDFBuilder from pyscf.pbc.df.rsdf_builder import _RSGDFBuilder from pyscf.pbc.df.incore import libpbc, make_auxcell +from pyscf.pbc.lib.kpts import KPoints from pyscf.pbc.lib.kpts_helper import is_zero, member, unique from pyscf.pbc.df import mdf_jk from pyscf.pbc.df import mdf_ao2mo @@ -55,6 +56,8 @@ def __init__(self, cell, kpts=np.zeros((1,3))): self.verbose = cell.verbose self.max_memory = cell.max_memory + if isinstance(kpts, KPoints): + kpts = kpts.kpts self.kpts = kpts # default is gamma point self.kpts_band = None self._auxbasis = None diff --git a/pyscf/pbc/df/mdf_jk.py b/pyscf/pbc/df/mdf_jk.py index de305a736c..f3fe0e2ee4 100644 --- a/pyscf/pbc/df/mdf_jk.py +++ b/pyscf/pbc/df/mdf_jk.py @@ -26,7 +26,6 @@ from pyscf.lib import logger from pyscf.pbc.df import df_jk from pyscf.pbc.df import aft_jk -from pyscf.pbc.lib.kpts import KPoints # # Divide the Coulomb potential to two parts. Computing short range part in @@ -52,8 +51,6 @@ def density_fit(mf, auxbasis=None, mesh=None, with_df=None): else: kpts = numpy.reshape(mf.kpt, (1,3)) - if isinstance(kpts, KPoints): - kpts = kpts.kpts with_df = mdf.MDF(mf.cell, kpts) with_df.max_memory = mf.max_memory with_df.stdout = mf.stdout diff --git a/pyscf/pbc/df/test/test_df.py b/pyscf/pbc/df/test/test_df.py index 62a14500a2..20036dee82 100644 --- a/pyscf/pbc/df/test/test_df.py +++ b/pyscf/pbc/df/test/test_df.py @@ -20,7 +20,7 @@ from pyscf import ao2mo, gto from pyscf.pbc import gto as pgto from pyscf.pbc import scf as pscf -from pyscf.pbc.df import df, aug_etb, FFTDF +from pyscf.pbc.df import df, aug_etb, FFTDF, mdf from pyscf.pbc.df import gdf_builder #from mpi4pyscf.pbc.df import df pyscf.pbc.DEBUG = False @@ -250,6 +250,21 @@ def test_cell_with_cart(self): eri1 = df.GDF(cell).set(auxbasis=aug_etb(cell)).get_eri() self.assertAlmostEqual(abs(eri1-eri0).max(), 0, 2) + def test_kpoints_input(sef): + cell.space_group_symmetry = True + cell.build() + kpts = cell.make_kpts([2,2,2], + space_group_symmetry=True, + time_reversal_symmetry=True) + + mydf = df.GDF(cell, kpts=kpts) + assert mydf.kpts.shape == (8,3) + + mydf = FFTDF(cell, kpts=kpts) + assert mydf.kpts.shape == (8,3) + + mydf = mdf.MDF(cell, kpts=kpts) + assert mydf.kpts.shape == (8,3) if __name__ == '__main__': print("Full Tests for df") From 92defdf2b3efeaee02233ae9836904c5c52234e6 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Fri, 12 Apr 2024 18:49:19 -0700 Subject: [PATCH 32/44] Fix pip builder --- .github/workflows/ci.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1da9486bb1..6b74a10a5f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,7 +59,7 @@ jobs: run: | docker run --rm -v ${{ github.workspace }}:/src/pyscf:rw --workdir=/src/pyscf ${{ env.img }} \ bash -exc '/opt/python/${{ matrix.pyver }}/bin/pip install --upgrade pip setuptools && \ - /opt/python/${{ matrix.pyver }}/bin/pip install "numpy!=1.16,!=1.17" "scipy!=1.5" h5py pytest pytest-cov pytest-timer pyberny geometric && \ + /opt/python/${{ matrix.pyver }}/bin/pip install "numpy!=1.16,!=1.17" "scipy!=1.5" h5py==3.10 pytest pytest-cov pytest-timer pyberny geometric && \ yum install -y epel-release && \ yum-config-manager --enable epel && \ yum install -y openblas-devel gcc cmake curl && \ diff --git a/pyproject.toml b/pyproject.toml index 7ba6770e34..926ba415d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools >= 61.0", "wheel"] +requires = ["setuptools >= 61.0", "wheel", "cmake"] build-backend = "setuptools.build_meta" From 7f0a1c3bca2bd97925547b4edcc34f722a26832a Mon Sep 17 00:00:00 2001 From: Hong-Zhou Ye Date: Sat, 13 Apr 2024 01:12:15 -0400 Subject: [PATCH 33/44] reset enuc in _build_supcell_ (#2164) * reset enuc in _build_supcell_ * add test for enuc reset --------- Co-authored-by: hongzhouye <> --- pyscf/pbc/tools/pbc.py | 1 + pyscf/pbc/tools/test/test_pbc.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pyscf/pbc/tools/pbc.py b/pyscf/pbc/tools/pbc.py index 20d45fe692..5151d4d4d5 100644 --- a/pyscf/pbc/tools/pbc.py +++ b/pyscf/pbc/tools/pbc.py @@ -676,6 +676,7 @@ def _build_supcell_(supcell, cell, Ls): x, y, z = coords.T supcell.atom = supcell._atom = list(zip(symbs, zip(x, y, z))) supcell.unit = 'B' + supcell.enuc = None # reset nuclear energy # Do not call supcell.build() to initialize supcell since it may normalize # the basis contraction coefficients diff --git a/pyscf/pbc/tools/test/test_pbc.py b/pyscf/pbc/tools/test/test_pbc.py index 3a8774f78e..52251aac79 100644 --- a/pyscf/pbc/tools/test/test_pbc.py +++ b/pyscf/pbc/tools/test/test_pbc.py @@ -144,9 +144,13 @@ def test_super_cell(self): mesh = [3]*3, atom ='''He .1 .0 .0''', basis = 'ccpvdz') - cl2 = tools.super_cell(cl1, [2,3,4]) + _ = cl1.enuc + ncopy = [2,3,4] + ncell = ncopy[0]*ncopy[1]*ncopy[2] + cl2 = tools.super_cell(cl1, ncopy) self.assertAlmostEqual(lib.fp(cl2.atom_coords()), -18.946080642714836, 9) self.assertAlmostEqual(lib.fp(cl2._bas[:,gto.ATOM_OF]), 16.515144238434807, 9) + self.assertAlmostEqual(cl1.enuc, cl2.enuc / ncell, 9) def test_super_cell_with_symm(self): cl1 = pbcgto.M(a = 1.4 * numpy.eye(3), From 77e13d0de49dde3bf3ee11a9a3ea5f9a86705a24 Mon Sep 17 00:00:00 2001 From: Hong-Zhou Ye Date: Sat, 13 Apr 2024 01:16:56 -0400 Subject: [PATCH 34/44] Bug fix for single k-point JK-build in PBC DF (#2165) * bug fix for j-build in get_jk * bug fix for k-build in get_jk --------- Co-authored-by: hongzhouye <> --- pyscf/pbc/df/aft_jk.py | 2 +- pyscf/pbc/df/df_jk.py | 8 ++++---- pyscf/pbc/df/test/test_aft_jk.py | 20 ++++++++++++++++++++ pyscf/pbc/df/test/test_df_jk.py | 15 +++++++++++++++ 4 files changed, 40 insertions(+), 5 deletions(-) diff --git a/pyscf/pbc/df/aft_jk.py b/pyscf/pbc/df/aft_jk.py index d6627545bd..f0f8acb229 100644 --- a/pyscf/pbc/df/aft_jk.py +++ b/pyscf/pbc/df/aft_jk.py @@ -732,7 +732,7 @@ def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, - 1, vkR[i], vkI[i]) + 1, vkR[i], vkI[i], 1) #t2 = log.timer_debug1(' with_k', *t2) pqkR = pqkI = pLqR = pLqI = iLkR = iLkI = None #t2 = log.timer_debug1('%d:%d'%(p0,p1), *t2) diff --git a/pyscf/pbc/df/df_jk.py b/pyscf/pbc/df/df_jk.py index b556fd512a..68bf20a010 100644 --- a/pyscf/pbc/df/df_jk.py +++ b/pyscf/pbc/df/df_jk.py @@ -1261,12 +1261,12 @@ def contract_k(pLqR, pLqI, sign): if with_j: #:rho_coeff = numpy.einsum('Lpq,xqp->xL', Lpq, dms) #:vj += numpy.dot(rho_coeff, Lpq.reshape(-1,nao**2)) - rhoR = numpy.einsum('Lpq,xpq->xL', LpqR, dmsR) + rhoR = numpy.einsum('Lpq,xqp->xL', LpqR, dmsR) if not j_real: LpqI = LpqI.reshape(-1,nao,nao) - rhoR -= numpy.einsum('Lpq,xpq->xL', LpqI, dmsI) - rhoI = numpy.einsum('Lpq,xpq->xL', LpqR, dmsI) - rhoI += numpy.einsum('Lpq,xpq->xL', LpqI, dmsR) + rhoR -= numpy.einsum('Lpq,xqp->xL', LpqI, dmsI) + rhoI = numpy.einsum('Lpq,xqp->xL', LpqR, dmsI) + rhoI += numpy.einsum('Lpq,xqp->xL', LpqI, dmsR) vjR += sign * numpy.einsum('xL,Lpq->xpq', rhoR, LpqR) if not j_real: vjR -= sign * numpy.einsum('xL,Lpq->xpq', rhoI, LpqI) diff --git a/pyscf/pbc/df/test/test_aft_jk.py b/pyscf/pbc/df/test/test_aft_jk.py index 7bd6d6915d..697c07f4b5 100644 --- a/pyscf/pbc/df/test/test_aft_jk.py +++ b/pyscf/pbc/df/test/test_aft_jk.py @@ -119,6 +119,26 @@ def test_jk(self): self.assertAlmostEqual(ej1, 12.233546641482697, 8) self.assertAlmostEqual(ek1, 43.946958026023722, 7) + def test_jk_complex_dm(self): + scaled_center = [0.3728,0.5524,0.7672] + kpt = cell.make_kpts([1,1,1], scaled_center=scaled_center)[0] + mf = scf.RHF(cell, kpt=kpt) + dm = mf.init_guess_by_1e() + + mydf = aft.AFTDF(cell, kpts=[kpt]) + vj1, vk1 = mydf.get_jk(dm, kpts=kpt, exxdiv='ewald') + vjs, vks = mydf.get_jk([dm], kpts=[kpt], exxdiv='ewald') + vj , vk = vjs[0], vks[0] + + ej1 = numpy.einsum('ij,ji->', vj1, dm) + ek1 = numpy.einsum('ij,ji->', vk1, dm) + ej = numpy.einsum('ij,ji->', vj , dm) + ek = numpy.einsum('ij,ji->', vk , dm) + + # kpts and single kpt AFTDF must match exactly + self.assertAlmostEqual(ej1, ej, 10) + self.assertAlmostEqual(ek1, ek, 10) + def test_aft_j(self): numpy.random.seed(1) nao = cell.nao_nr() diff --git a/pyscf/pbc/df/test/test_df_jk.py b/pyscf/pbc/df/test/test_df_jk.py index e9b5fa1e52..ed8c37a5d9 100644 --- a/pyscf/pbc/df/test/test_df_jk.py +++ b/pyscf/pbc/df/test/test_df_jk.py @@ -81,6 +81,21 @@ def test_jk_single_kpt(self): self.assertAlmostEqual(ej1, 25.8129854469354, 6) self.assertAlmostEqual(ek1, 72.6088517709998, 6) + def test_jk_single_kpt_complex_dm(self): + scaled_center = [0.3728,0.5524,0.7672] + kpt = cell0.make_kpts([1,1,1], scaled_center=scaled_center)[0] + mf = pscf.RHF(cell0, kpt=kpt).density_fit('weigend') + dm = mf.init_guess_by_1e() + with lib.temporary_env(mf.cell, incore_anyway=True): + vj1, vk1 = mf.get_jk(dm=dm) # from mol_hf.dot_eri_dm + ej1 = numpy.einsum('ij,ji->', vj1, dm) + ek1 = numpy.einsum('ij,ji->', vk1, dm) + vj, vk = mf.with_df.get_jk(dm=dm, kpts=kpt, exxdiv=mf.exxdiv) + ej = numpy.einsum('ij,ji->', vj, dm) + ek = numpy.einsum('ij,ji->', vk, dm) + self.assertAlmostEqual(ej1, ej, 10) + self.assertAlmostEqual(ek1, ek, 10) + def test_jk_single_kpt_high_cost(self): mf0 = pscf.RHF(cell) mf0.exxdiv = None From c549c46be5963f657a48da6ee908fb5ed3bdc870 Mon Sep 17 00:00:00 2001 From: Maximilian Scheurer Date: Fri, 12 Apr 2024 12:59:02 +0200 Subject: [PATCH 35/44] add missing init file --- pyscf/solvent/grad/__init__.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 pyscf/solvent/grad/__init__.py diff --git a/pyscf/solvent/grad/__init__.py b/pyscf/solvent/grad/__init__.py new file mode 100644 index 0000000000..2b02f141a1 --- /dev/null +++ b/pyscf/solvent/grad/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Qiming Sun +# \ No newline at end of file From 4f08ae58d842cfafc8a6bf1144d19d4a478958b9 Mon Sep 17 00:00:00 2001 From: Xiaojie Wu Date: Mon, 15 Apr 2024 12:35:03 -0700 Subject: [PATCH 36/44] DFTD3 & DFTD4 builder (#2161) * local build dftd3/dftd4 * flake8 * fixed issues apis * add compilation tool for mac * fixed bugs in unittest * fixed an issue in unit test * add fortran compiler * fortran compiler * Move dftd3 and dftd4 builder to a separated repo (pyscf-dispersion) * enable dftd3 & dftd4 for python3.12 * Adjust tests --------- Co-authored-by: Qiming Sun --- .github/workflows/ci_linux/python_deps.sh | 3 +- .github/workflows/ci_macos/deps_apt.sh | 1 - pyscf/dft/test/test_h2o.py | 16 ++--- pyscf/grad/dispersion.py | 19 +++--- pyscf/grad/test/test_rhf.py | 14 ++--- pyscf/grad/test/test_rks.py | 55 +++++++++++++++++ pyscf/grad/test/test_uhf.py | 73 +++++++++++++++++++++++ pyscf/grad/test/test_uks.py | 32 ++++++++++ pyscf/hessian/dispersion.py | 27 +++++---- pyscf/hessian/test/test_rhf.py | 33 ++++++++++ pyscf/hessian/test/test_rks.py | 25 +++----- pyscf/hessian/test/test_uhf.py | 34 ++++++++++- pyscf/hessian/test/test_uks.py | 62 +++++++++++++++---- pyscf/lib/CMakeLists.txt | 2 +- pyscf/scf/dispersion.py | 35 ++++------- 15 files changed, 334 insertions(+), 97 deletions(-) diff --git a/.github/workflows/ci_linux/python_deps.sh b/.github/workflows/ci_linux/python_deps.sh index 3f52c7cb8a..d772db29e6 100755 --- a/.github/workflows/ci_linux/python_deps.sh +++ b/.github/workflows/ci_linux/python_deps.sh @@ -2,13 +2,12 @@ python -m pip install --upgrade pip pip install "numpy!=1.16,!=1.17" "scipy!=1.5" h5py pytest pytest-cov pytest-timer pip install pyberny +pip install --no-deps pyscf-dispersion version=$(python -c 'import sys; version=sys.version_info[:2]; print("{0}.{1}".format(*version))') if [ $version != '3.12' ]; then pip install geometric pip install spglib - pip install dftd3 - pip install dftd4 fi #cppe diff --git a/.github/workflows/ci_macos/deps_apt.sh b/.github/workflows/ci_macos/deps_apt.sh index 742e13d6fd..f1f641af19 100755 --- a/.github/workflows/ci_macos/deps_apt.sh +++ b/.github/workflows/ci_macos/deps_apt.sh @@ -1,2 +1 @@ #!/usr/bin/env bash -exit 0 diff --git a/pyscf/dft/test/test_h2o.py b/pyscf/dft/test/test_h2o.py index 1c1c1ececf..c12b295ead 100644 --- a/pyscf/dft/test/test_h2o.py +++ b/pyscf/dft/test/test_h2o.py @@ -18,18 +18,10 @@ from pyscf import gto from pyscf import lib from pyscf import dft - - -import sys -try: - import dftd3 -except ImportError: - pass - try: - import dftd4 + from pyscf.dispersion import dftd3, dftd4 except ImportError: - pass + dftd3 = dftd4 = None def setUpModule(): global h2o, h2osym, h2o_cation, h2osym_cation @@ -501,7 +493,7 @@ def test_nr_uks_vv10_high_cost(self): method.nlcgrids.atom_grid = {"H": (40, 110), "O": (40, 110),} self.assertAlmostEqual(method.scf(), -76.352381513158718, 8) - @unittest.skipIf('dftd3' not in sys.modules, "requires the dftd3 library") + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") def test_dft_parser(self): from pyscf.scf import dispersion method = dft.RKS(h2o, xc='wb97m-d3bj') @@ -553,7 +545,7 @@ def test_camb3lyp_rsh_omega(self): mf2.kernel() self.assertAlmostEqual(mf1.e_tot, -76.36649222362115, 9) - @unittest.skipIf('dftd3' not in sys.modules, "requires the dftd3 library") + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") def test_dispersion(self): mf = dft.RKS(h2o) mf.xc = 'B3LYP' diff --git a/pyscf/grad/dispersion.py b/pyscf/grad/dispersion.py index 2cd5fe705c..dc8ca7965c 100644 --- a/pyscf/grad/dispersion.py +++ b/pyscf/grad/dispersion.py @@ -26,6 +26,11 @@ def get_dispersion(mf_grad, disp_version=None, with_3body=False): '''gradient of dispersion correction for RHF/RKS''' + try: + from pyscf.dispersion import dftd3, dftd4 + except ImportError: + print('dftd3 and dftd4 not available. Install them with `pip install pyscf-dispersion`') + raise mf = mf_grad.base mol = mf.mol if isinstance(mf, KohnShamDFT): @@ -49,16 +54,14 @@ def get_dispersion(mf_grad, disp_version=None, with_3body=False): with_3body = mf.disp_with_3body if disp_version[:2].upper() == 'D3': - # raised error in SCF module, assuming dftd3 installed - import dftd3.pyscf as disp - d3 = disp.DFTD3Dispersion(mol, xc=method, version=disp_version, atm=with_3body) - _, g_d3 = d3.kernel() + d3_model = dftd3.DFTD3Dispersion(mol, xc=method, version=disp_version, atm=with_3body) + res = d3_model.get_dispersion(grad=True) + g_d3 = res.get('gradient') return g_d3 elif disp_version[:2].upper() == 'D4': - # raised error in SCF module, assuming dftd3 installed - import dftd4.pyscf as disp - d4 = disp.DFTD4Dispersion(mol, xc=method, atm=with_3body) - _, g_d4 = d4.kernel() + d4_model = dftd4.DFTD4Dispersion(mol, xc=method, atm=with_3body) + res = d4_model.get_dispersion(grad=True) + g_d4 = res.get('gradient') return g_d4 else: raise RuntimeError(f'dispersion correction: {disp_version} is not supported.') diff --git a/pyscf/grad/test/test_rhf.py b/pyscf/grad/test/test_rhf.py index 263be449b5..087e6cf40e 100644 --- a/pyscf/grad/test/test_rhf.py +++ b/pyscf/grad/test/test_rhf.py @@ -18,16 +18,10 @@ from pyscf import gto, scf, lib from pyscf import grad -import sys try: - import dftd3 + from pyscf.dispersion import dftd3, dftd4 except ImportError: - pass - -try: - import dftd4 -except ImportError: - pass + dftd3 = dftd4 = None def setUpModule(): global mol @@ -82,7 +76,7 @@ def test_df_rhf_grad(self): e2 = mfs('O 0. 0. 0.001; H 0. -0.757 0.587; H 0. 0.757 0.587') self.assertAlmostEqual(g[0,2], (e2-e1)/0.002*lib.param.BOHR, 5) - @unittest.skipIf('dftd3' not in sys.modules, "requires the dftd3 library") + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") def test_rhf_d3_grad(self): mf = scf.RHF(mol) mf.disp = 'd3bj' @@ -94,7 +88,7 @@ def test_rhf_d3_grad(self): e2 = mf_scan('O 0. 0. 0.001; H 0. -0.757 0.587; H 0. 0.757 0.587') self.assertAlmostEqual((e2-e1)/0.002*lib.param.BOHR, g[0,2], 5) - @unittest.skipIf('dftd4' not in sys.modules, "requires the dftd4 library") + @unittest.skipIf(dftd4 is None, "requires the dftd4 library") def test_rhf_d4_grad(self): mf = scf.RHF(mol) mf.disp = 'd4' diff --git a/pyscf/grad/test/test_rks.py b/pyscf/grad/test/test_rks.py index fe760651de..456e11cdc4 100644 --- a/pyscf/grad/test/test_rks.py +++ b/pyscf/grad/test/test_rks.py @@ -18,6 +18,11 @@ from pyscf import gto, dft, lib from pyscf.dft import radi from pyscf.grad import rks +try: + from pyscf.dispersion import dftd3, dftd4 +except ImportError: + dftd3 = dftd4 = None + def grids_response(grids): # JCP 98, 5612 (1993); DOI:10.1063/1.464906 @@ -189,6 +194,30 @@ def test_finite_diff_rks_grad(self): e2 = mf_scanner(mol1.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) self.assertAlmostEqual(g[0,2], (e1-e2)/2e-4*lib.param.BOHR, 6) + def test_fnite_diff_rks_d3_grad(self): + mol1 = mol.copy() + mf = dft.RKS(mol) + mf.conv_tol = 1e-14 + mf.kernel() + g = mf.nuc_grad_method().set(grid_response=True).kernel() + + mf_scanner = mf.as_scanner() + e1 = mf_scanner(mol1.set_geom_('O 0. 0. 0.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) + e2 = mf_scanner(mol1.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) + self.assertAlmostEqual(g[0,2], (e1-e2)/2e-4*lib.param.BOHR, 6) + + def test_fnite_diff_rks_d4_grad(self): + mol1 = mol.copy() + mf = dft.RKS(mol) + mf.conv_tol = 1e-14 + mf.kernel() + g = mf.nuc_grad_method().set(grid_response=True).kernel() + + mf_scanner = mf.as_scanner() + e1 = mf_scanner(mol1.set_geom_('O 0. 0. 0.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) + e2 = mf_scanner(mol1.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) + self.assertAlmostEqual(g[0,2], (e1-e2)/2e-4*lib.param.BOHR, 6) + def test_finite_diff_df_rks_grad(self): mf1 = mf.density_fit ().run () g = mf1.nuc_grad_method ().set (grid_response=True).kernel () @@ -200,6 +229,32 @@ def test_finite_diff_df_rks_grad(self): e2 = mf_scanner(mol1.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) self.assertAlmostEqual(g[0,2], (e1-e2)/2e-4*lib.param.BOHR, 6) + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") + def test_finite_diff_df_rks_d3_grad(self): + mf1 = mf.density_fit () + mf1.disp = 'd3bj' + mf1.kernel() + g = mf1.nuc_grad_method ().set (grid_response=True).kernel () + + mol1 = mol.copy() + mf_scanner = mf1.as_scanner() + e1 = mf_scanner(mol1.set_geom_('O 0. 0. 0.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) + e2 = mf_scanner(mol1.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) + self.assertAlmostEqual(g[0,2], (e1-e2)/2e-4*lib.param.BOHR, 6) + + @unittest.skipIf(dftd4 is None, "requires the dftd4 library") + def test_finite_diff_df_rks_d4_grad(self): + mf1 = mf.density_fit () + mf1.disp = 'd4' + mf1.kernel() + g = mf1.nuc_grad_method ().set (grid_response=True).kernel () + + mol1 = mol.copy() + mf_scanner = mf1.as_scanner() + e1 = mf_scanner(mol1.set_geom_('O 0. 0. 0.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) + e2 = mf_scanner(mol1.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) + self.assertAlmostEqual(g[0,2], (e1-e2)/2e-4*lib.param.BOHR, 6) + def test_rks_grad_lda(self): mol_hf = gto.Mole() mol_hf.atom = [ diff --git a/pyscf/grad/test/test_uhf.py b/pyscf/grad/test/test_uhf.py index c6c055bf05..25d3885031 100644 --- a/pyscf/grad/test/test_uhf.py +++ b/pyscf/grad/test/test_uhf.py @@ -17,6 +17,11 @@ import numpy from pyscf import gto, scf, lib from pyscf import grad +try: + from pyscf.dispersion import dftd3, dftd4 +except ImportError: + dftd3 = dftd4 = None + def setUpModule(): global mol, mol1 @@ -104,6 +109,40 @@ def test_finite_diff_uhf_grad(self): H -0.43459905 0.65805058 -0.00861418''') self.assertAlmostEqual(g[2,1], (e2-e1)/2e-4*lib.param.BOHR, 7) + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") + def test_finite_diff_uhf_d3_grad(self): + mf = scf.UHF(mol) + mf.disp = 'd3bj' + mf.conv_tol = 1e-14 + e0 = mf.kernel() + g = grad.UHF(mf).kernel() + mf_scanner = mf.as_scanner() + + e1 = mf_scanner('''O 0. 0. 0. + 1 0. -0.758 0.587 + 1 0. 0.757 0.587''') + e2 = mf_scanner('''O 0. 0. 0. + 1 0. -0.756 0.587 + 1 0. 0.757 0.587''') + self.assertAlmostEqual(g[1,1], (e2-e1)/2e-3*lib.param.BOHR, 5) + + @unittest.skipIf(dftd4 is None, "requires the dftd4 library") + def test_finite_diff_uhf_d4_grad(self): + mf = scf.UHF(mol) + mf.disp = 'd4' + mf.conv_tol = 1e-14 + e0 = mf.kernel() + g = grad.UHF(mf).kernel() + mf_scanner = mf.as_scanner() + + e1 = mf_scanner('''O 0. 0. 0. + 1 0. -0.758 0.587 + 1 0. 0.757 0.587''') + e2 = mf_scanner('''O 0. 0. 0. + 1 0. -0.756 0.587 + 1 0. 0.757 0.587''') + self.assertAlmostEqual(g[1,1], (e2-e1)/2e-3*lib.param.BOHR, 5) + def test_finite_diff_df_uhf_grad(self): mf = scf.UHF(mol).density_fit () mf.conv_tol = 1e-14 @@ -157,6 +196,40 @@ def test_finite_diff_df_uhf_grad(self): H -0.43459905 0.65805058 -0.00861418''') self.assertAlmostEqual(g[2,1], (e2-e1)/2e-4*lib.param.BOHR, 7) + @unittest.skipIf(dftd4 is None, "requires the dftd4 library") + def test_finite_diff_df_uhf_d4_grad(self): + mf = scf.UHF(mol).density_fit () + mf.conv_tol = 1e-14 + mf.disp = 'd3bj' + e0 = mf.kernel() + g = mf.nuc_grad_method ().kernel() + mf_scanner = mf.as_scanner() + + e1 = mf_scanner('''O 0. 0. 0. + 1 0. -0.758 0.587 + 1 0. 0.757 0.587''') + e2 = mf_scanner('''O 0. 0. 0. + 1 0. -0.756 0.587 + 1 0. 0.757 0.587''') + self.assertAlmostEqual(g[1,1], (e2-e1)/2e-3*lib.param.BOHR, 5) + + @unittest.skipIf(dftd4 is None, "requires the dftd4 library") + def test_finite_diff_df_uhf_d4_grad(self): + mf = scf.UHF(mol).density_fit () + mf.conv_tol = 1e-14 + mf.disp = 'd4' + e0 = mf.kernel() + g = mf.nuc_grad_method ().kernel() + mf_scanner = mf.as_scanner() + + e1 = mf_scanner('''O 0. 0. 0. + 1 0. -0.758 0.587 + 1 0. 0.757 0.587''') + e2 = mf_scanner('''O 0. 0. 0. + 1 0. -0.756 0.587 + 1 0. 0.757 0.587''') + self.assertAlmostEqual(g[1,1], (e2-e1)/2e-3*lib.param.BOHR, 5) + def test_uhf_grad_one_atom(self): mol = gto.Mole() mol.atom = [['He', (0.,0.,0.)], ] diff --git a/pyscf/grad/test/test_uks.py b/pyscf/grad/test/test_uks.py index effd7218a7..9a763444ba 100644 --- a/pyscf/grad/test/test_uks.py +++ b/pyscf/grad/test/test_uks.py @@ -18,6 +18,10 @@ from pyscf import gto, dft, lib from pyscf.dft import radi from pyscf.grad import uks +try: + from pyscf.dispersion import dftd3, dftd4 +except ImportError: + dftd3 = dftd4 = None def setUpModule(): @@ -73,6 +77,34 @@ def test_finite_diff_df_uks_grad(self): e2 = mf_scanner(mol1.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) self.assertAlmostEqual(g[0,2], (e1-e2)/2e-4*lib.param.BOHR, 6) + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") + def test_fnite_diff_uks_d3_grad(self): + mol1 = mol.copy() + mf = dft.UKS(mol) + mf.disp = 'd3bj' + mf.conv_tol = 1e-14 + mf.kernel() + g = mf.nuc_grad_method().set(grid_response=True).kernel() + + mf_scanner = mf.as_scanner() + e1 = mf_scanner(mol1.set_geom_('O 0. 0. 0.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) + e2 = mf_scanner(mol1.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) + self.assertAlmostEqual(g[0,2], (e1-e2)/2e-4*lib.param.BOHR, 6) + + @unittest.skipIf(dftd4 is None, "requires the dftd4 library") + def test_fnite_diff_uks_d4_grad(self): + mol1 = mol.copy() + mf = dft.UKS(mol) + mf.disp = 'd4' + mf.conv_tol = 1e-14 + mf.kernel() + g = mf.nuc_grad_method().set(grid_response=True).kernel() + + mf_scanner = mf.as_scanner() + e1 = mf_scanner(mol1.set_geom_('O 0. 0. 0.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) + e2 = mf_scanner(mol1.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) + self.assertAlmostEqual(g[0,2], (e1-e2)/2e-4*lib.param.BOHR, 6) + def test_uks_grad_lda(self): mol = gto.Mole() mol.atom = [ diff --git a/pyscf/hessian/dispersion.py b/pyscf/hessian/dispersion.py index 728b01e2cf..060d5b1a10 100644 --- a/pyscf/hessian/dispersion.py +++ b/pyscf/hessian/dispersion.py @@ -26,6 +26,11 @@ from pyscf.dft import dft_parser def get_dispersion(hessobj, disp_version=None, with_3body=False): + try: + from pyscf.dispersion import dftd3, dftd4 + except ImportError: + print('dftd3 and dftd4 not available. Install them with `pip install pyscf-dispersion`') + raise mf = hessobj.base mol = mf.mol if isinstance(mf, KohnShamDFT): @@ -51,7 +56,6 @@ def get_dispersion(hessobj, disp_version=None, with_3body=False): with_3body = mf.disp_with_3body if mf.disp[:2].upper() == 'D3': - import dftd3.pyscf as disp coords = hessobj.mol.atom_coords() mol = mol.copy() eps = 1e-5 @@ -59,20 +63,21 @@ def get_dispersion(hessobj, disp_version=None, with_3body=False): for j in range(3): coords[i,j] += eps mol.set_geom_(coords, unit='Bohr') - d3 = disp.DFTD3Dispersion(mol, xc=method, version=mf.disp, atm=with_3body) - _, g1 = d3.kernel() + d3_model = dftd3.DFTD3Dispersion(mol, xc=method, version=mf.disp, atm=with_3body) + res = d3_model.get_dispersion(grad=True) + g1 = res.get('gradient') coords[i,j] -= 2.0*eps mol.set_geom_(coords, unit='Bohr') - d3 = disp.DFTD3Dispersion(mol, xc=method, version=mf.disp, atm=with_3body) - _, g2 = d3.kernel() + d3_model = dftd3.DFTD3Dispersion(mol, xc=method, version=mf.disp, atm=with_3body) + res = d3_model.get_dispersion(grad=True) + g2 = res.get('gradient') coords[i,j] += eps h_disp[i,:,j,:] = (g1 - g2)/(2.0*eps) return h_disp elif mf.disp[:2].upper() == 'D4': - import dftd4.pyscf as disp coords = hessobj.mol.atom_coords() mol = mol.copy() eps = 1e-5 @@ -80,13 +85,15 @@ def get_dispersion(hessobj, disp_version=None, with_3body=False): for j in range(3): coords[i,j] += eps mol.set_geom_(coords, unit='Bohr') - d4 = disp.DFTD4Dispersion(mol, xc=method, atm=with_3body) - _, g1 = d4.kernel() + d4_model = dftd4.DFTD4Dispersion(mol, xc=method, atm=with_3body) + res = d4_model.get_dispersion(grad=True) + g1 = res.get('gradient') coords[i,j] -= 2.0*eps mol.set_geom_(coords, unit='Bohr') - d4 = disp.DFTD4Dispersion(mol, xc=method, atm=with_3body) - _, g2 = d4.kernel() + d4_model = dftd4.DFTD4Dispersion(mol, xc=method, atm=with_3body) + res = d4_model.get_dispersion(grad=True) + g2 = res.get('gradient') coords[i,j] += eps h_disp[i,:,j,:] = (g1 - g2)/(2.0*eps) diff --git a/pyscf/hessian/test/test_rhf.py b/pyscf/hessian/test/test_rhf.py index 7f3bfdb2f9..b0c3cbbeda 100644 --- a/pyscf/hessian/test/test_rhf.py +++ b/pyscf/hessian/test/test_rhf.py @@ -17,6 +17,10 @@ import numpy from pyscf import gto, scf, lib from pyscf import grad, hessian +try: + from pyscf.dispersion import dftd3, dftd4 +except ImportError: + dftd3 = dftd4 = None def setUpModule(): global mol @@ -32,6 +36,7 @@ def setUpModule(): def tearDownModule(): global mol + mol.stdout.close() del mol class KnownValues(unittest.TestCase): @@ -86,6 +91,34 @@ def test_finite_diff_rhf_hess(self): e2 = g_scanner(pmol.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] self.assertAlmostEqual(abs(hess[0,:,2] - (e1-e2)/2e-4*lib.param.BOHR).max(), 0, 4) + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") + def test_finite_diff_rhf_d3_hess(self): + mf = scf.RHF(mol) + mf.conv_tol = 1e-14 + mf.disp = 'd3bj' + e0 = mf.kernel() + hess = hessian.RHF(mf).kernel() + + g_scanner = mf.nuc_grad_method().as_scanner() + pmol = mol.copy() + e1 = g_scanner(pmol.set_geom_('O 0. 0. 0.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] + e2 = g_scanner(pmol.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] + self.assertAlmostEqual(abs(hess[0,:,2] - (e1-e2)/2e-4*lib.param.BOHR).max(), 0, 4) + + @unittest.skipIf(dftd4 is None, "requires the dftd4 library") + def test_finite_diff_rhf_d4_hess_high_cost(self): + mf = scf.RHF(mol) + mf.conv_tol = 1e-14 + mf.disp = 'd4' + e0 = mf.kernel() + hess = hessian.RHF(mf).kernel() + + g_scanner = mf.nuc_grad_method().as_scanner() + pmol = mol.copy() + e1 = g_scanner(pmol.set_geom_('O 0. 0. 0.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] + e2 = g_scanner(pmol.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] + self.assertAlmostEqual(abs(hess[0,:,2] - (e1-e2)/2e-4*lib.param.BOHR).max(), 0, 4) + # e1 = g_scanner(pmol.set_geom_('O 0. 0.0001 0.; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] # e2 = g_scanner(pmol.set_geom_('O 0. -.0001 0.; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] # self.assertAlmostEqual(abs(hess[0,:,1] - (e1-e2)/2e-4*lib.param.BOHR).max(), 0, 4) diff --git a/pyscf/hessian/test/test_rks.py b/pyscf/hessian/test/test_rks.py index ab7389f015..5d6a976ad8 100644 --- a/pyscf/hessian/test/test_rks.py +++ b/pyscf/hessian/test/test_rks.py @@ -17,17 +17,10 @@ import numpy from pyscf import gto, dft, lib from pyscf import grad, hessian - -import sys -try: - import dftd3 -except ImportError: - pass - try: - import dftd4 + from pyscf.dispersion import dftd3, dftd4 except ImportError: - pass + dftd3 = dftd4 = None def setUpModule(): global mol, h4 @@ -43,6 +36,7 @@ def setUpModule(): h4 = gto.Mole() h4.verbose = 0 + h4.output = '/dev/null' h4.atom = [ [1 , (1. , 0. , 0.000)], [1 , (0. , 1. , 0.000)], @@ -55,6 +49,7 @@ def setUpModule(): def tearDownModule(): global mol, h4 mol.stdout.close() + h4.stdout.close() del mol, h4 def finite_diff(mf): @@ -127,15 +122,14 @@ def test_finite_diff_b3lyp_hess(self): #FIXME: errors seems too big self.assertAlmostEqual(abs(hess[0,:,2] - (e1-e2)/2e-4*lib.param.BOHR).max(), 0, 3) - @unittest.skipIf('dftd3' not in sys.modules, "requires the dftd3 library") - def test_finite_diff_b3lyp_d3_hess(self): + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") + def test_finite_diff_b3lyp_d3_hess_high_cost(self): mf = dft.RKS(mol) mf.conv_tol = 1e-14 mf.xc = 'b3lyp' mf.disp = 'd3bj' - e0 = mf.kernel() + mf.kernel() hess = mf.Hessian().kernel() - self.assertAlmostEqual(lib.fp(hess), -0.7586078053657133, 6) g_scanner = mf.nuc_grad_method().as_scanner() pmol = mol.copy() @@ -144,15 +138,14 @@ def test_finite_diff_b3lyp_d3_hess(self): #FIXME: errors seems too big self.assertAlmostEqual(abs(hess[0,:,2] - (e1-e2)/2e-4*lib.param.BOHR).max(), 0, 3) - @unittest.skipIf('dftd4' not in sys.modules, "requires the dftd4 library") + @unittest.skipIf(dftd4 is None, "requires the dftd4 library") def test_finite_diff_b3lyp_d4_hess(self): mf = dft.RKS(mol) mf.conv_tol = 1e-14 mf.xc = 'b3lyp' mf.disp = 'd4' - e0 = mf.kernel() + mf.kernel() hess = mf.Hessian().kernel() - self.assertAlmostEqual(lib.fp(hess), -0.7588415571313422, 6) g_scanner = mf.nuc_grad_method().as_scanner() pmol = mol.copy() diff --git a/pyscf/hessian/test/test_uhf.py b/pyscf/hessian/test/test_uhf.py index 06d32b38ad..64b7765603 100644 --- a/pyscf/hessian/test/test_uhf.py +++ b/pyscf/hessian/test/test_uhf.py @@ -17,6 +17,10 @@ import numpy from pyscf import gto, scf, lib from pyscf import grad, hessian +try: + from pyscf.dispersion import dftd3, dftd4 +except ImportError: + dftd3 = dftd4 = None def setUpModule(): global mol @@ -33,6 +37,7 @@ def setUpModule(): def tearDownModule(): global mol + mol.stdout.close() del mol class KnownValues(unittest.TestCase): @@ -45,7 +50,7 @@ def test_uhf_hess(self): hess = hobj.kernel() self.assertAlmostEqual(lib.fp(hess), -0.20243405976628576, 5) - def test_finite_diff_rhf_hess(self): + def test_finite_diff_uhf_hess(self): mf = scf.UHF(mol) mf.conv_tol = 1e-14 e0 = mf.kernel() @@ -58,6 +63,33 @@ def test_finite_diff_rhf_hess(self): e2 = g_scanner(pmol.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] self.assertAlmostEqual(abs(hess[0,:,2] - (e1-e2)/2e-4*lib.param.BOHR).max(), 0, 4) + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") + def test_finite_diff_uhf_d3_hess(self): + mf = scf.UHF(mol) + mf.conv_tol = 1e-14 + mf.disp = 'd3bj' + e0 = mf.kernel() + hess = mf.Hessian().kernel() + + g_scanner = mf.nuc_grad_method().as_scanner() + pmol = mol.copy() + e1 = g_scanner(pmol.set_geom_('O 0. 0. 0.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] + e2 = g_scanner(pmol.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] + self.assertAlmostEqual(abs(hess[0,:,2] - (e1-e2)/2e-4*lib.param.BOHR).max(), 0, 4) + + @unittest.skipIf(dftd4 is None, "requires the dftd4 library") + def test_finite_diff_uhf_d4_hess(self): + mf = scf.UHF(mol) + mf.conv_tol = 1e-14 + mf.disp = 'd4' + e0 = mf.kernel() + hess = mf.Hessian().kernel() + + g_scanner = mf.nuc_grad_method().as_scanner() + pmol = mol.copy() + e1 = g_scanner(pmol.set_geom_('O 0. 0. 0.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] + e2 = g_scanner(pmol.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] + self.assertAlmostEqual(abs(hess[0,:,2] - (e1-e2)/2e-4*lib.param.BOHR).max(), 0, 4) if __name__ == "__main__": print("Full Tests for UHF Hessian") diff --git a/pyscf/hessian/test/test_uks.py b/pyscf/hessian/test/test_uks.py index e2394bef2e..f35ef29870 100644 --- a/pyscf/hessian/test/test_uks.py +++ b/pyscf/hessian/test/test_uks.py @@ -17,9 +17,13 @@ import numpy from pyscf import gto, dft, lib from pyscf import grad, hessian +try: + from pyscf.dispersion import dftd3, dftd4 +except ImportError: + dftd3 = dftd4 = None def setUpModule(): - global mol + global mol, h4 mol = gto.Mole() mol.verbose = 5 mol.output = '/dev/null' @@ -32,21 +36,23 @@ def setUpModule(): mol.spin = 1 mol.build() -h4 = gto.Mole() -h4.verbose = 0 -h4.atom = [ - [1 , (1. , 0. , 0.000)], - [1 , (0. , 1. , 0.000)], - [1 , (0. , -1.517 , 1.177)], - [1 , (0. , 1.517 , 1.177)]] -h4.basis = '631g' -h4.spin = 2 -h4.unit = 'B' -h4.build() + h4 = gto.Mole() + h4.verbose = 0 + h4.output = '/dev/null' + h4.atom = [ + [1 , (1. , 0. , 0.000)], + [1 , (0. , 1. , 0.000)], + [1 , (0. , -1.517 , 1.177)], + [1 , (0. , 1.517 , 1.177)]] + h4.basis = '631g' + h4.spin = 2 + h4.unit = 'B' + h4.build() def tearDownModule(): global mol, h4 mol.stdout.close() + h4.stdout.close() del mol, h4 def finite_diff(mf): @@ -119,6 +125,38 @@ def test_finite_diff_b3lyp_hess(self): #FIXME: errors seems too big self.assertAlmostEqual(abs(hess[0,:,2] - (e1-e2)/2e-4*lib.param.BOHR).max(), 0, 3) + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") + def test_finite_diff_b3lyp_d3_hess_high_cost(self): + mf = dft.UKS(mol) + mf.conv_tol = 1e-14 + mf.xc = 'b3lyp' + mf.disp = 'd3bj' + mf.kernel() + hess = mf.Hessian().kernel() + + g_scanner = mf.nuc_grad_method().as_scanner() + pmol = mol.copy() + e1 = g_scanner(pmol.set_geom_('O 0. 0. 0.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] + e2 = g_scanner(pmol.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] + #FIXME: errors seems too big + self.assertAlmostEqual(abs(hess[0,:,2] - (e1-e2)/2e-4*lib.param.BOHR).max(), 0, 3) + + @unittest.skipIf(dftd4 is None, "requires the dftd4 library") + def test_finite_diff_b3lyp_d4_hess_high_cost(self): + mf = dft.UKS(mol) + mf.conv_tol = 1e-14 + mf.xc = 'b3lyp' + mf.disp = 'd4' + mf.kernel() + hess = mf.Hessian().kernel() + + g_scanner = mf.nuc_grad_method().as_scanner() + pmol = mol.copy() + e1 = g_scanner(pmol.set_geom_('O 0. 0. 0.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] + e2 = g_scanner(pmol.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587'))[1] + #FIXME: errors seems too big + self.assertAlmostEqual(abs(hess[0,:,2] - (e1-e2)/2e-4*lib.param.BOHR).max(), 0, 3) + def test_finite_diff_wb97x_hess(self): mf = dft.UKS(mol) mf.conv_tol = 1e-14 diff --git a/pyscf/lib/CMakeLists.txt b/pyscf/lib/CMakeLists.txt index 4b7236535e..84e7622019 100644 --- a/pyscf/lib/CMakeLists.txt +++ b/pyscf/lib/CMakeLists.txt @@ -168,7 +168,7 @@ if(BUILD_LIBCINT) set(LIBCINT_GIT https://github.com/sunqm/qcint.git) # qcint is an optimized implementation for x86-64 architecture set(LIBCINT_VERSION v6.1.2) if(NOT BUILD_MARCH_NATIVE) - message(WARNING "The BUILD_MARCH_NATIVE option is not specified! qcint may not compile unless you explicitly pass compiler flags that turn on vectorization!") + message(WARNING "The BUILD_MARCH_NATIVE option is not specified! qcint may not compile unless you explicitly pass compiler flags that turn on vectorization!") endif() endif() diff --git a/pyscf/scf/dispersion.py b/pyscf/scf/dispersion.py index 94e9018ff7..5b5af27831 100644 --- a/pyscf/scf/dispersion.py +++ b/pyscf/scf/dispersion.py @@ -24,6 +24,11 @@ from pyscf.dft import dft_parser def get_dispersion(mf, disp_version=None): + try: + from pyscf.dispersion import dftd3, dftd4 + except ImportError: + print('dftd3 and dftd4 not available. Install them with `pip install pyscf-dispersion`') + raise mol = mf.mol if isinstance(mf, KohnShamDFT): method = mf.xc @@ -46,35 +51,17 @@ def get_dispersion(mf, disp_version=None): # for dftd3 if disp_version[:2].upper() == 'D3': - try: - import dftd3.pyscf as disp - except ImportError: - raise ImportError("\n \ -cannot find dftd3 in the current environment.\n \ -please install dftd3 via \n \ -**************************************\n\ - pip3 install dftd3 \n \ -**************************************") - - d3 = disp.DFTD3Dispersion(mol, xc=method, version=disp_version, atm=with_3body) - e_d3, _ = d3.kernel() + d3_model = dftd3.DFTD3Dispersion(mol, xc=method, version=disp_version, atm=with_3body) + res = d3_model.get_dispersion() + e_d3 = res.get('energy') mf.scf_summary['dispersion'] = e_d3 return e_d3 # for dftd4 elif disp_version[:2].upper() == 'D4': - try: - import dftd4.pyscf as disp - except ImportError: - raise ImportError("\n \ -cannot find dftd4 in the current environment. \n \ -please install dftd4 via \n \ -***************************************\n \ - pip3 install dftd4 \n \ -***************************************") - - d4 = disp.DFTD4Dispersion(mol, xc=method, atm=with_3body) - e_d4, _ = d4.kernel() + d4_model = dftd4.DFTD4Dispersion(mol, xc=method, atm=with_3body) + res = d4_model.get_dispersion() + e_d4 = res.get('energy') mf.scf_summary['dispersion'] = e_d4 return e_d4 else: From 16d547143f3ab5d8733a177e4f70d2b01b370da5 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Tue, 9 Apr 2024 16:44:03 -0700 Subject: [PATCH 37/44] Kpoint dft to_hf error (fix issue #2157) --- pyscf/pbc/dft/kgks.py | 11 +++++++++-- pyscf/pbc/dft/krks.py | 28 ++++++++-------------------- pyscf/pbc/dft/kroks.py | 28 ++++++++-------------------- pyscf/pbc/dft/kuks.py | 28 ++++++++-------------------- pyscf/pbc/dft/test/test_kgks.py | 14 ++++++++++++++ pyscf/pbc/dft/test/test_krks.py | 26 ++++++++++++++++++++++++++ pyscf/pbc/dft/test/test_kuks.py | 14 ++++++++++++++ 7 files changed, 87 insertions(+), 62 deletions(-) diff --git a/pyscf/pbc/dft/kgks.py b/pyscf/pbc/dft/kgks.py index 7774f2e36e..fb9e1f2f68 100644 --- a/pyscf/pbc/dft/kgks.py +++ b/pyscf/pbc/dft/kgks.py @@ -146,7 +146,14 @@ def nuc_grad_method(self): def to_hf(self): '''Convert to KGHF object.''' - from pyscf.pbc import scf - return self._transfer_attrs_(scf.KGHF(self.cell, self.kpts)) + from pyscf.pbc import scf, df + out = self._transfer_attrs_(scf.KGHF(self.cell, self.kpts)) + + # Pure functionals only construct J-type integrals. Enable all integrals for KHF. + if (not self._numint.libxc.is_hybrid_xc(self.xc) and + len(self.kpts) > 1 and getattr(self.with_df, '_j_only', False)): + out.with_df._j_only = False + out.with_df.reset() + return out to_gpu = lib.to_gpu diff --git a/pyscf/pbc/dft/krks.py b/pyscf/pbc/dft/krks.py index 6278605de9..d690647879 100644 --- a/pyscf/pbc/dft/krks.py +++ b/pyscf/pbc/dft/krks.py @@ -181,25 +181,13 @@ def nuc_grad_method(self): def to_hf(self): '''Convert to KRHF object.''' - from pyscf.pbc import scf - return self._transfer_attrs_(scf.KRHF(self.cell, self.kpts)) + from pyscf.pbc import scf, df + out = self._transfer_attrs_(scf.KRHF(self.cell, self.kpts)) + # Pure functionals only construct J-type integrals. Enable all integrals for KHF. + if (not self._numint.libxc.is_hybrid_xc(self.xc) and + len(self.kpts) > 1 and getattr(self.with_df, '_j_only', False)): + out.with_df._j_only = False + out.with_df.reset() + return out to_gpu = lib.to_gpu - - -if __name__ == '__main__': - from pyscf.pbc import gto - cell = gto.Cell() - cell.unit = 'A' - cell.atom = 'C 0., 0., 0.; C 0.8917, 0.8917, 0.8917' - cell.a = '''0. 1.7834 1.7834 - 1.7834 0. 1.7834 - 1.7834 1.7834 0. ''' - - cell.basis = 'gth-szv' - cell.pseudo = 'gth-pade' - cell.verbose = 7 - cell.output = '/dev/null' - cell.build() - mf = KRKS(cell, cell.make_kpts([2,1,1])) - print(mf.kernel()) diff --git a/pyscf/pbc/dft/kroks.py b/pyscf/pbc/dft/kroks.py index 1d2a1198f8..640bf05e97 100644 --- a/pyscf/pbc/dft/kroks.py +++ b/pyscf/pbc/dft/kroks.py @@ -61,25 +61,13 @@ def dump_flags(self, verbose=None): def to_hf(self): '''Convert to KROHF object.''' - from pyscf.pbc import scf - return self._transfer_attrs_(scf.KROHF(self.cell, self.kpts)) + from pyscf.pbc import scf, df + out = self._transfer_attrs_(scf.KROHF(self.cell, self.kpts)) + # Pure functionals only construct J-type integrals. Enable all integrals for KHF. + if (not self._numint.libxc.is_hybrid_xc(self.xc) and + len(self.kpts) > 1 and getattr(self.with_df, '_j_only', False)): + out.with_df._j_only = False + out.with_df.reset() + return out to_gpu = lib.to_gpu - - -if __name__ == '__main__': - from pyscf.pbc import gto - cell = gto.Cell() - cell.unit = 'A' - cell.atom = 'C 0., 0., 0.; C 0.8917, 0.8917, 0.8917' - cell.a = '''0. 1.7834 1.7834 - 1.7834 0. 1.7834 - 1.7834 1.7834 0. ''' - - cell.basis = 'gth-szv' - cell.pseudo = 'gth-pade' - cell.verbose = 7 - cell.output = '/dev/null' - cell.build() - mf = KROKS(cell, cell.make_kpts([2,1,1])) - print(mf.kernel()) diff --git a/pyscf/pbc/dft/kuks.py b/pyscf/pbc/dft/kuks.py index ac66f973bc..732adcfa32 100644 --- a/pyscf/pbc/dft/kuks.py +++ b/pyscf/pbc/dft/kuks.py @@ -157,25 +157,13 @@ def nuc_grad_method(self): def to_hf(self): '''Convert to KUHF object.''' - from pyscf.pbc import scf - return self._transfer_attrs_(scf.KUHF(self.cell, self.kpts)) + from pyscf.pbc import scf, df + out = self._transfer_attrs_(scf.KUHF(self.cell, self.kpts)) + # Pure functionals only construct J-type integrals. Enable all integrals for KHF. + if (not self._numint.libxc.is_hybrid_xc(self.xc) and + len(self.kpts) > 1 and getattr(self.with_df, '_j_only', False)): + out.with_df._j_only = False + out.with_df.reset() + return out to_gpu = lib.to_gpu - - -if __name__ == '__main__': - from pyscf.pbc import gto - cell = gto.Cell() - cell.unit = 'A' - cell.atom = 'C 0., 0., 0.; C 0.8917, 0.8917, 0.8917' - cell.a = '''0. 1.7834 1.7834 - 1.7834 0. 1.7834 - 1.7834 1.7834 0. ''' - - cell.basis = 'gth-szv' - cell.pseudo = 'gth-pade' - cell.verbose = 7 - cell.output = '/dev/null' - cell.build() - mf = KUKS(cell, cell.make_kpts([2,1,1])) - print(mf.kernel()) diff --git a/pyscf/pbc/dft/test/test_kgks.py b/pyscf/pbc/dft/test/test_kgks.py index a4fba351c4..6ebd1165ea 100644 --- a/pyscf/pbc/dft/test/test_kgks.py +++ b/pyscf/pbc/dft/test/test_kgks.py @@ -22,6 +22,7 @@ from pyscf import lib from pyscf.pbc import gto as gto from pyscf.pbc import dft as dft +from pyscf.pbc import scf as pbcscf from pyscf.pbc.df import rsdf_builder, gdf_builder try: import mcfun @@ -189,6 +190,19 @@ def test_mcol_x2c_kgks_lda(self): mf.run() self.assertAlmostEqual(mf.e_tot, -1.4910121442258883, 6) + def test_to_hf(self): + mf = dft.KGKS(cell).density_fit() + mf.with_df._j_only = True + a_hf = mf.to_hf() + self.assertTrue(a_hf.with_df._j_only) + self.assertTrue(isinstance(a_hf, pbcscf.kghf.KGHF)) + + mf = dft.KGKS(cell, kpts=cell.make_kpts([2,1,1])).density_fit() + mf.with_df._j_only = True + a_hf = mf.to_hf() + self.assertTrue(not a_hf.with_df._j_only) + self.assertTrue(isinstance(a_hf, pbcscf.kghf.KGHF)) + if __name__ == '__main__': print("Full Tests for pbc.dft.kgks") diff --git a/pyscf/pbc/dft/test/test_krks.py b/pyscf/pbc/dft/test/test_krks.py index 090711ec54..e8418cb972 100644 --- a/pyscf/pbc/dft/test/test_krks.py +++ b/pyscf/pbc/dft/test/test_krks.py @@ -22,6 +22,7 @@ from pyscf.pbc import gto as pbcgto from pyscf.pbc import dft as pbcdft +from pyscf.pbc import scf as pbcscf def build_cell(mesh): @@ -143,6 +144,31 @@ def test_rsh_df(self): mf.kernel() self.assertAlmostEqual(mf.e_tot, -2.4766238116030683, 5) + def test_to_hf(self): + mf = pbcdft.KRKS(cell).density_fit() + mf.with_df._j_only = True + a_hf = mf.to_hf() + self.assertTrue(a_hf.with_df._j_only) + self.assertTrue(isinstance(a_hf, pbcscf.khf.KRHF)) + + mf = pbcdft.KRKS(cell, kpts=cell.make_kpts([2,1,1])).density_fit() + mf.with_df._j_only = True + a_hf = mf.to_hf() + self.assertTrue(not a_hf.with_df._j_only) + self.assertTrue(isinstance(a_hf, pbcscf.khf.KRHF)) + + mf = pbcdft.KROKS(cell).density_fit() + mf.with_df._j_only = True + a_hf = mf.to_hf() + self.assertTrue(a_hf.with_df._j_only) + self.assertTrue(isinstance(a_hf, pbcscf.krohf.KROHF)) + + mf = pbcdft.KROKS(cell, kpts=cell.make_kpts([2,1,1])).density_fit() + mf.with_df._j_only = True + a_hf = mf.to_hf() + self.assertTrue(not a_hf.with_df._j_only) + self.assertTrue(isinstance(a_hf, pbcscf.krohf.KROHF)) + # TODO: test the reset method of pbcdft.KRKS, pbcdft.RKS whether the reset # methods of all subsequent objects are called diff --git a/pyscf/pbc/dft/test/test_kuks.py b/pyscf/pbc/dft/test/test_kuks.py index d99fddf7fa..a81ae3051f 100644 --- a/pyscf/pbc/dft/test/test_kuks.py +++ b/pyscf/pbc/dft/test/test_kuks.py @@ -19,6 +19,7 @@ import unittest import numpy as np from pyscf.pbc import gto as pbcgto +from pyscf.pbc import scf as pbcscf from pyscf.pbc import dft as pbcdft @@ -91,6 +92,19 @@ def test_rsh_df(self): mf.kernel() self.assertAlmostEqual(mf.e_tot, -2.4766238116030683, 7) + def test_to_hf(self): + mf = pbcdft.KUKS(cell).density_fit() + mf.with_df._j_only = True + a_hf = mf.to_hf() + self.assertTrue(a_hf.with_df._j_only) + self.assertTrue(isinstance(a_hf, pbcscf.kuhf.KUHF)) + + mf = pbcdft.KUKS(cell, kpts=cell.make_kpts([2,1,1])).density_fit() + mf.with_df._j_only = True + a_hf = mf.to_hf() + self.assertTrue(not a_hf.with_df._j_only) + self.assertTrue(isinstance(a_hf, pbcscf.kuhf.KUHF)) + if __name__ == '__main__': print("Full Tests for pbc.dft.kuks") From 25a24fed24229a250cb2b0fd9c571666bfc010ce Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Sat, 13 Apr 2024 23:11:57 -0700 Subject: [PATCH 38/44] ancient GCC compatibility --- pyscf/lib/dft/utils.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/pyscf/lib/dft/utils.c b/pyscf/lib/dft/utils.c index 04ef8e5b2f..fc27b4a48f 100644 --- a/pyscf/lib/dft/utils.c +++ b/pyscf/lib/dft/utils.c @@ -52,11 +52,20 @@ void get_gga_vrho_gs(double complex *out, double complex *vrho_gs, double comple int i; int ngrid2 = 2 * ngrid; double complex fac = -2. * _Complex_I; - #pragma omp parallel for simd schedule(static) +#pragma omp parallel +{ + double complex v; +// ensure OpenMP 4.0 +#if defined _OPENMP && _OPENMP >= 201307 + #pragma omp for simd schedule(static) +#else + #pragma omp for schedule(static) +#endif for (i = 0; i < ngrid; i++) { - out[i] = ( Gv[i*3] * vsigma1_gs[i] - +Gv[i*3+1] * vsigma1_gs[i+ngrid] - +Gv[i*3+2] * vsigma1_gs[i+ngrid2]) * fac + vrho_gs[i]; - out[i] *= weight; + v = ( Gv[i*3] * vsigma1_gs[i] + +Gv[i*3+1] * vsigma1_gs[i+ngrid] + +Gv[i*3+2] * vsigma1_gs[i+ngrid2]) * fac + vrho_gs[i]; + out[i] = v * weight; } } +} From 6d3b24bb64e2a5edb7990b6e3304068981a33f54 Mon Sep 17 00:00:00 2001 From: Matthew Hennefarth Date: Mon, 15 Apr 2024 13:42:13 -0500 Subject: [PATCH 39/44] fix --- pyscf/df/grad/casdm2_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyscf/df/grad/casdm2_util.py b/pyscf/df/grad/casdm2_util.py index 6149e9eb51..4f6a5efbb0 100644 --- a/pyscf/df/grad/casdm2_util.py +++ b/pyscf/df/grad/casdm2_util.py @@ -476,7 +476,7 @@ def grad_elec_dferi (mc_grad, mo_cas=None, ci=None, dfcasdm2=None, casdm2=None, aoslices = mol.aoslice_by_atom () dE = np.array ([dE[:,p0:p1].sum (axis=1) for p0, p1 in aoslices[:,2:]]).transpose (1,0,2) - return np.ascontiguousarray (dE) + return np.ascontiguousarray (dE)[:,atmlst,:] if __name__ == '__main__': from pyscf.tools import molden From 7d3caf1e14d05f0e2a9bfaf904507e91a30b4034 Mon Sep 17 00:00:00 2001 From: Zhenyu Zhu ajz34 Date: Mon, 29 Apr 2024 05:16:18 +0800 Subject: [PATCH 40/44] fix: infinite recursion atom_hf call when ECP with super-heavy atoms (>Cm) (#2183) * bugfix: resolve recursive initial guess when atomic charge > 96 (> Cm) and ecp activated * Adjust AGF2 tests --------- Co-authored-by: Qiming Sun --- pyscf/agf2/test/test_c_agf2.py | 16 ++++++++-------- pyscf/scf/atom_hf.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pyscf/agf2/test/test_c_agf2.py b/pyscf/agf2/test/test_c_agf2.py index b77551aa96..54325599ff 100644 --- a/pyscf/agf2/test/test_c_agf2.py +++ b/pyscf/agf2/test/test_c_agf2.py @@ -42,8 +42,8 @@ def test_c_ragf2(self): gf_vir = aux.GreensFunction(np.random.random(self.nvir), np.eye(self.nmo, self.nvir)) vv1, vev1 = _agf2.build_mats_ragf2_outcore(xija, gf_occ.energy, gf_vir.energy) vv2, vev2 = _agf2.build_mats_ragf2_incore(xija, gf_occ.energy, gf_vir.energy) - self.assertAlmostEqual(np.max(np.absolute(vv1-vv2)), 0.0, 10) - self.assertAlmostEqual(np.max(np.absolute(vev1-vev2)), 0.0, 10) + self.assertAlmostEqual(np.max(np.absolute(vv1-vv2)), 0.0, 8) + self.assertAlmostEqual(np.max(np.absolute(vev1-vev2)), 0.0, 8) def test_c_dfragf2(self): qxi = np.random.random((self.naux, self.nmo*self.nocc)) / self.naux @@ -52,8 +52,8 @@ def test_c_dfragf2(self): gf_vir = aux.GreensFunction(np.random.random(self.nvir), np.eye(self.nmo, self.nvir)) vv1, vev1 = _agf2.build_mats_dfragf2_outcore(qxi, qja, gf_occ.energy, gf_vir.energy) vv2, vev2 = _agf2.build_mats_dfragf2_incore(qxi, qja, gf_occ.energy, gf_vir.energy) - self.assertAlmostEqual(np.max(np.absolute(vv1-vv2)), 0.0, 10) - self.assertAlmostEqual(np.max(np.absolute(vev1-vev2)), 0.0, 10) + self.assertAlmostEqual(np.max(np.absolute(vv1-vv2)), 0.0, 8) + self.assertAlmostEqual(np.max(np.absolute(vev1-vev2)), 0.0, 8) def test_c_uagf2(self): xija = np.random.random((2, self.nmo, self.nocc, self.nocc, self.nvir)) @@ -63,8 +63,8 @@ def test_c_uagf2(self): aux.GreensFunction(np.random.random(self.nvir), np.eye(self.nmo, self.nvir))) vv1, vev1 = _agf2.build_mats_uagf2_outcore(xija, (gf_occ[0].energy, gf_occ[1].energy), (gf_vir[0].energy, gf_vir[1].energy)) vv2, vev2 = _agf2.build_mats_uagf2_incore(xija, (gf_occ[0].energy, gf_occ[1].energy), (gf_vir[0].energy, gf_vir[1].energy)) - self.assertAlmostEqual(np.max(np.absolute(vv1-vv2)), 0.0, 10) - self.assertAlmostEqual(np.max(np.absolute(vev1-vev2)), 0.0, 10) + self.assertAlmostEqual(np.max(np.absolute(vv1-vv2)), 0.0, 8) + self.assertAlmostEqual(np.max(np.absolute(vev1-vev2)), 0.0, 8) def test_c_dfuagf2(self): qxi = np.random.random((2, self.naux, self.nmo*self.nocc)) / self.naux @@ -75,8 +75,8 @@ def test_c_dfuagf2(self): aux.GreensFunction(np.random.random(self.nvir), np.eye(self.nmo, self.nvir))) vv1, vev1 = _agf2.build_mats_dfuagf2_outcore(qxi, qja, (gf_occ[0].energy, gf_occ[1].energy), (gf_vir[0].energy, gf_vir[1].energy)) vv2, vev2 = _agf2.build_mats_dfuagf2_incore(qxi, qja, (gf_occ[0].energy, gf_occ[1].energy), (gf_vir[0].energy, gf_vir[1].energy)) - self.assertAlmostEqual(np.max(np.absolute(vv1-vv2)), 0.0, 10) - self.assertAlmostEqual(np.max(np.absolute(vev1-vev2)), 0.0, 10) + self.assertAlmostEqual(np.max(np.absolute(vv1-vv2)), 0.0, 8) + self.assertAlmostEqual(np.max(np.absolute(vev1-vev2)), 0.0, 8) if __name__ == '__main__': diff --git a/pyscf/scf/atom_hf.py b/pyscf/scf/atom_hf.py index 4430963493..56dca05b28 100644 --- a/pyscf/scf/atom_hf.py +++ b/pyscf/scf/atom_hf.py @@ -93,7 +93,7 @@ def __init__(self, mol): hf.SCF.__init__(self, mol) # The default initial guess minao does not have super-heavy elements - if mol.atom_charge(0) > 96: + if gto.charge(mol.atom_symbol(0)) > 96: self.init_guess = '1e' self = self.apply(addons.remove_linear_dep_) From 02469ebfd6e6137d7f69baf4461cc8d40c85f462 Mon Sep 17 00:00:00 2001 From: jeanwsr Date: Fri, 26 Apr 2024 18:34:22 +0800 Subject: [PATCH 41/44] fix x_id in parse_token --- pyscf/dft/libxc.py | 4 ++-- pyscf/dft/test/test_libxc.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pyscf/dft/libxc.py b/pyscf/dft/libxc.py index 6b2a841a52..2c9aec1ebe 100644 --- a/pyscf/dft/libxc.py +++ b/pyscf/dft/libxc.py @@ -1189,8 +1189,8 @@ def parse_token(token, ftype, search_xc_alias=False): else: # Some libxc functionals may not be listed in the # XC_CODES table. Query libxc directly - func_id = _itrf.xc_functional_get_number(ctypes.c_char_p(key.encode())) - if func_id == -1: + x_id = _itrf.xc_functional_get_number(ctypes.c_char_p(key.encode())) + if x_id == -1: raise KeyError(f"LibXCFunctional: name '{key}' not found.") if isinstance(x_id, str): hyb1, fn_facs1 = parse_xc(x_id) diff --git a/pyscf/dft/test/test_libxc.py b/pyscf/dft/test/test_libxc.py index f4223fdeec..373b81b119 100644 --- a/pyscf/dft/test/test_libxc.py +++ b/pyscf/dft/test/test_libxc.py @@ -123,6 +123,9 @@ def test_parse_xc(self): self.assertEqual(dft.libxc.parse_xc('Xpbe,')[1], ((123,1),)) self.assertEqual(dft.libxc.parse_xc('pbe,' )[1], ((101,1),)) + self.assertEqual(dft.libxc.parse_xc('gga_x_pbe_gaussian' )[1], ((321,1),)) + + hyb, fn_facs = dft.libxc.parse_xc('PBE*.4+LDA') self.assertEqual(fn_facs, ((101, 0.4), (130, 0.4), (1, 1))) self.assertRaises(KeyError, dft.libxc.parse_xc, 'PBE+VWN') From a343760d0b9bf0adc22c1d7daf5a9a3d4477bb8d Mon Sep 17 00:00:00 2001 From: Xiaojie Wu Date: Thu, 2 May 2024 22:31:46 -0700 Subject: [PATCH 42/44] fixing unit test for dftd3 and dftd4 --- pyscf/grad/test/test_rks.py | 14 ++++++++------ pyscf/grad/test/test_uks.py | 8 ++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pyscf/grad/test/test_rks.py b/pyscf/grad/test/test_rks.py index 456e11cdc4..cbee809485 100644 --- a/pyscf/grad/test/test_rks.py +++ b/pyscf/grad/test/test_rks.py @@ -194,9 +194,10 @@ def test_finite_diff_rks_grad(self): e2 = mf_scanner(mol1.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) self.assertAlmostEqual(g[0,2], (e1-e2)/2e-4*lib.param.BOHR, 6) - def test_fnite_diff_rks_d3_grad(self): + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") + def test_finite_diff_rks_d3_grad(self): mol1 = mol.copy() - mf = dft.RKS(mol) + mf = dft.RKS(mol, xc='b3lyp') mf.conv_tol = 1e-14 mf.kernel() g = mf.nuc_grad_method().set(grid_response=True).kernel() @@ -206,9 +207,10 @@ def test_fnite_diff_rks_d3_grad(self): e2 = mf_scanner(mol1.set_geom_('O 0. 0. -.0001; 1 0. -0.757 0.587; 1 0. 0.757 0.587')) self.assertAlmostEqual(g[0,2], (e1-e2)/2e-4*lib.param.BOHR, 6) - def test_fnite_diff_rks_d4_grad(self): + @unittest.skipIf(dftd4 is None, "requires the dftd4 library") + def test_finite_diff_rks_d4_grad(self): mol1 = mol.copy() - mf = dft.RKS(mol) + mf = dft.RKS(mol, xc='b3lyp') mf.conv_tol = 1e-14 mf.kernel() g = mf.nuc_grad_method().set(grid_response=True).kernel() @@ -231,7 +233,7 @@ def test_finite_diff_df_rks_grad(self): @unittest.skipIf(dftd3 is None, "requires the dftd3 library") def test_finite_diff_df_rks_d3_grad(self): - mf1 = mf.density_fit () + mf1 = dft.RKS(mol, xc='b3lyp').density_fit () mf1.disp = 'd3bj' mf1.kernel() g = mf1.nuc_grad_method ().set (grid_response=True).kernel () @@ -244,7 +246,7 @@ def test_finite_diff_df_rks_d3_grad(self): @unittest.skipIf(dftd4 is None, "requires the dftd4 library") def test_finite_diff_df_rks_d4_grad(self): - mf1 = mf.density_fit () + mf1 = dft.RKS(mol, xc='b3lyp').density_fit () mf1.disp = 'd4' mf1.kernel() g = mf1.nuc_grad_method ().set (grid_response=True).kernel () diff --git a/pyscf/grad/test/test_uks.py b/pyscf/grad/test/test_uks.py index 9a763444ba..5a082a9621 100644 --- a/pyscf/grad/test/test_uks.py +++ b/pyscf/grad/test/test_uks.py @@ -78,9 +78,9 @@ def test_finite_diff_df_uks_grad(self): self.assertAlmostEqual(g[0,2], (e1-e2)/2e-4*lib.param.BOHR, 6) @unittest.skipIf(dftd3 is None, "requires the dftd3 library") - def test_fnite_diff_uks_d3_grad(self): + def test_finite_diff_uks_d3_grad(self): mol1 = mol.copy() - mf = dft.UKS(mol) + mf = dft.UKS(mol, xc='b3lyp') mf.disp = 'd3bj' mf.conv_tol = 1e-14 mf.kernel() @@ -92,9 +92,9 @@ def test_fnite_diff_uks_d3_grad(self): self.assertAlmostEqual(g[0,2], (e1-e2)/2e-4*lib.param.BOHR, 6) @unittest.skipIf(dftd4 is None, "requires the dftd4 library") - def test_fnite_diff_uks_d4_grad(self): + def test_finite_diff_uks_d4_grad(self): mol1 = mol.copy() - mf = dft.UKS(mol) + mf = dft.UKS(mol, xc='b3lyp') mf.disp = 'd4' mf.conv_tol = 1e-14 mf.kernel() From 9a48cd1d29068c9051e154aed4bb965bde9a5197 Mon Sep 17 00:00:00 2001 From: Michal Krompiec Date: Fri, 3 May 2024 20:32:31 +0100 Subject: [PATCH 43/44] Run CASCI and CASSCF without symmetry if symmetry is C1 (#2195) * Run CASCI and CASSCF without symmetry if symmetry is C1 * groupname, not symmetry! --- pyscf/mcscf/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyscf/mcscf/__init__.py b/pyscf/mcscf/__init__.py index 82196ded33..db8a6e53bd 100644 --- a/pyscf/mcscf/__init__.py +++ b/pyscf/mcscf/__init__.py @@ -190,7 +190,7 @@ def CASSCF(mf_or_mol, ncas, nelecas, ncore=None, frozen=None): if isinstance(mf, _DFHF) and mf.with_df: return DFCASSCF(mf, ncas, nelecas, ncore, frozen) - if mf.mol.symmetry: + if mf.mol.symmetry and mf.mol.groupname != 'C1': mc = mc1step_symm.CASSCF(mf, ncas, nelecas, ncore, frozen) else: mc = mc1step.CASSCF(mf, ncas, nelecas, ncore, frozen) @@ -214,7 +214,7 @@ def CASCI(mf_or_mol, ncas, nelecas, ncore=None): if isinstance(mf, _DFHF) and mf.with_df: return DFCASCI(mf, ncas, nelecas, ncore) - if mf.mol.symmetry: + if mf.mol.symmetry and mf.mol.groupname != 'C1': mc = casci_symm.CASCI(mf, ncas, nelecas, ncore) else: mc = casci.CASCI(mf, ncas, nelecas, ncore) @@ -279,7 +279,7 @@ def DFCASSCF(mf_or_mol, ncas, nelecas, auxbasis=None, ncore=None, if isinstance(mf, scf.uhf.UHF): mf = mf.to_rhf() - if mf.mol.symmetry: + if mf.mol.symmetry and mf.mol.groupname != 'C1': mc = mc1step_symm.CASSCF(mf, ncas, nelecas, ncore, frozen) else: mc = mc1step.CASSCF(mf, ncas, nelecas, ncore, frozen) @@ -296,7 +296,7 @@ def DFCASCI(mf_or_mol, ncas, nelecas, auxbasis=None, ncore=None): if isinstance(mf, scf.uhf.UHF): mf = mf.to_rhf() - if mf.mol.symmetry: + if mf.mol.symmetry and mf.mol.groupname != 'C1': mc = casci_symm.CASCI(mf, ncas, nelecas, ncore) else: mc = casci.CASCI(mf, ncas, nelecas, ncore) From 940e4ac16f02eeef3fc944eae190d7f0609a60e7 Mon Sep 17 00:00:00 2001 From: Xiaojie Wu Date: Fri, 3 May 2024 23:42:36 -0700 Subject: [PATCH 44/44] move auxbasis_response out of __init__ (#2192) * move auxbasis_response out of __init__ * relax unit test in test_c_agf2 * import hessian * Update __init__.py --- pyscf/df/grad/rhf.py | 7 ++++--- pyscf/df/grad/rks.py | 7 ++++--- pyscf/df/grad/uhf.py | 7 ++++--- pyscf/df/grad/uks.py | 6 +++--- pyscf/df/hessian/rhf.py | 2 +- pyscf/df/hessian/rks.py | 2 +- pyscf/df/hessian/uhf.py | 2 +- pyscf/df/hessian/uks.py | 2 +- 8 files changed, 19 insertions(+), 16 deletions(-) diff --git a/pyscf/df/grad/rhf.py b/pyscf/df/grad/rhf.py index cfa31375eb..94aad0be24 100644 --- a/pyscf/df/grad/rhf.py +++ b/pyscf/df/grad/rhf.py @@ -482,11 +482,12 @@ class Gradients(rhf_grad.Gradients): _keys = {'with_df', 'auxbasis_response'} def __init__(self, mf): - # Whether to include the response of DF auxiliary basis when computing - # nuclear gradients of J/K matrices - self.auxbasis_response = True rhf_grad.Gradients.__init__(self, mf) + # Whether to include the response of DF auxiliary basis when computing + # nuclear gradients of J/K matrices + auxbasis_response = True + def check_sanity(self): assert isinstance(self.base, df.df_jk._DFHF) diff --git a/pyscf/df/grad/rks.py b/pyscf/df/grad/rks.py index adfc7b7080..1802299b68 100644 --- a/pyscf/df/grad/rks.py +++ b/pyscf/df/grad/rks.py @@ -107,11 +107,12 @@ class Gradients(rks_grad.Gradients): _keys = {'with_df', 'auxbasis_response'} def __init__(self, mf): - # Whether to include the response of DF auxiliary basis when computing - # nuclear gradients of J/K matrices - self.auxbasis_response = True rks_grad.Gradients.__init__(self, mf) + # Whether to include the response of DF auxiliary basis when computing + # nuclear gradients of J/K matrices + auxbasis_response = True + get_jk = df_rhf_grad.Gradients.get_jk get_j = df_rhf_grad.Gradients.get_j get_k = df_rhf_grad.Gradients.get_k diff --git a/pyscf/df/grad/uhf.py b/pyscf/df/grad/uhf.py index 0eec773b0d..6295633ede 100644 --- a/pyscf/df/grad/uhf.py +++ b/pyscf/df/grad/uhf.py @@ -35,11 +35,12 @@ class Gradients(uhf_grad.Gradients): _keys = {'with_df', 'auxbasis_response'} def __init__(self, mf): - # Whether to include the response of DF auxiliary basis when computing - # nuclear gradients of J/K matrices - self.auxbasis_response = True uhf_grad.Gradients.__init__(self, mf) + # Whether to include the response of DF auxiliary basis when computing + # nuclear gradients of J/K matrices + auxbasis_response = True + get_jk = df_rhf_grad.Gradients.get_jk get_j = df_rhf_grad.Gradients.get_j get_k = df_rhf_grad.Gradients.get_k diff --git a/pyscf/df/grad/uks.py b/pyscf/df/grad/uks.py index e6de663a95..30fe60b361 100644 --- a/pyscf/df/grad/uks.py +++ b/pyscf/df/grad/uks.py @@ -108,11 +108,11 @@ class Gradients(uks_grad.Gradients): _keys = {'with_df', 'auxbasis_response'} def __init__(self, mf): - # Whether to include the response of DF auxiliary basis when computing - # nuclear gradients of J/K matrices - self.auxbasis_response = True uks_grad.Gradients.__init__(self, mf) + # Whether to include the response of DF auxiliary basis when computing + # nuclear gradients of J/K matrices + auxbasis_response = True get_jk = df_rhf_grad.Gradients.get_jk get_j = df_rhf_grad.Gradients.get_j get_k = df_rhf_grad.Gradients.get_k diff --git a/pyscf/df/hessian/rhf.py b/pyscf/df/hessian/rhf.py index d06fa9f473..6c4cd691e5 100644 --- a/pyscf/df/hessian/rhf.py +++ b/pyscf/df/hessian/rhf.py @@ -475,9 +475,9 @@ def _load_dim0(dat, p0, p1): class Hessian(rhf_hess.Hessian): '''Non-relativistic restricted Hartree-Fock hessian''' def __init__(self, mf): - self.auxbasis_response = 1 rhf_hess.Hessian.__init__(self, mf) + auxbasis_response = 1 partial_hess_elec = partial_hess_elec make_h1 = make_h1 diff --git a/pyscf/df/hessian/rks.py b/pyscf/df/hessian/rks.py index 74c1bdd6c9..1d1073d657 100644 --- a/pyscf/df/hessian/rks.py +++ b/pyscf/df/hessian/rks.py @@ -121,9 +121,9 @@ def make_h1(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None): class Hessian(rks_hess.Hessian): '''Non-relativistic RKS hessian''' def __init__(self, mf): - self.auxbasis_response = 1 rks_hess.Hessian.__init__(self, mf) + auxbasis_response = 1 partial_hess_elec = partial_hess_elec make_h1 = make_h1 diff --git a/pyscf/df/hessian/uhf.py b/pyscf/df/hessian/uhf.py index 5cb20240f8..be863e87ec 100644 --- a/pyscf/df/hessian/uhf.py +++ b/pyscf/df/hessian/uhf.py @@ -526,9 +526,9 @@ def _gen_jk(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None, class Hessian(uhf_hess.Hessian): '''Non-relativistic UHF hessian''' def __init__(self, mf): - self.auxbasis_response = 1 uhf_hess.Hessian.__init__(self, mf) + auxbasis_response = 1 partial_hess_elec = partial_hess_elec make_h1 = make_h1 diff --git a/pyscf/df/hessian/uks.py b/pyscf/df/hessian/uks.py index 1afa995973..3c86207f36 100644 --- a/pyscf/df/hessian/uks.py +++ b/pyscf/df/hessian/uks.py @@ -134,9 +134,9 @@ def make_h1(hessobj, mo_coeff, mo_occ, chkfile=None, atmlst=None, verbose=None): class Hessian(uks_hess.Hessian): '''Non-relativistic RKS hessian''' def __init__(self, mf): - self.auxbasis_response = 1 uks_hess.Hessian.__init__(self, mf) + auxbasis_response = 1 partial_hess_elec = partial_hess_elec make_h1 = make_h1