Skip to content

Commit 3f3d3bb

Browse files
authored
Merge pull request #52 from lcmd-epfl/add-orca-gbw
Add orca gbw reader
2 parents 654d11b + 2456aaf commit 3f3d3bb

11 files changed

+2506
-7
lines changed

qstack/compound.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,17 @@ def xyz_to_mol(fin, basis="def2-svp", charge=0, spin=0, ignore=False, unit='ANG'
1818
basis (str or dict): Basis set.
1919
charge (int): Charge of the molecule.
2020
spin (int): Spin of the molecule (alpha electrons - beta electrons).
21+
ignore (bool): If assume molecule closed-shell an assign charge either 0 or -1
22+
unit (str): units (Ang or Bohr)
23+
ecp (str) : ECP to use
2124
2225
Returns:
2326
A pyscf Mole object containing the molecule information.
2427
"""
2528

2629
# Open and read the file
27-
f = open(fin, "r")
28-
molxyz = "\n".join(f.read().split("\n")[2:])
29-
f.close()
30+
with open(fin, "r") as f:
31+
molxyz = "\n".join(f.read().split("\n")[2:])
3032

3133
# Define attributes to the Mole object and build it
3234
mol = gto.Mole()

qstack/orcaio.py

+226-2
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,62 @@
1+
import warnings
2+
import struct
13
import numpy as np
4+
import pyscf
25
from qstack.mathutils.matrix import from_tril
36
from qstack.tools import reorder_ao
47

8+
9+
def read_input(fname, basis, ecp=None):
10+
"""Read the structure from an Orca input (XYZ coordinates in simple format only)
11+
12+
Note: we do not read basis set info from the file.
13+
TODO: read also %coords block?
14+
15+
Args:
16+
fname (str) : path to file
17+
basis (str/dict) : basis name, path to file, or dict in the pyscf format
18+
Kwargs:
19+
ecp (str) : ECP to use
20+
21+
Returns:
22+
pyscf Mole object.
23+
"""
24+
25+
with open(fname, "r") as f:
26+
lines = [x.strip() for x in f.readlines()]
27+
28+
command_line = '\n'.join(y[1:] for y in filter(lambda x: x.startswith('!'), lines)).lower().split()
29+
if 'bohrs' in command_line:
30+
unit = 'Bohr'
31+
else:
32+
unit = 'Angstrom'
33+
34+
idx_xyz_0, idx_xyz_1 = [y[0] for y in filter(lambda x: x[1].startswith('*'), enumerate(lines))][:2]
35+
charge, mult = map(int, lines[idx_xyz_0][1:].split()[1:])
36+
molxyz = '\n'.join(lines[idx_xyz_0+1:idx_xyz_1])
37+
38+
mol = pyscf.gto.Mole()
39+
mol.atom = molxyz
40+
mol.charge = charge
41+
mol.spin = mult-1
42+
mol.unit = unit
43+
mol.basis = basis
44+
mol.ecp = ecp
45+
mol.build()
46+
return mol
47+
48+
549
def read_density(mol, basename, directory='./', version=500, openshell=False, reorder_dest='pyscf'):
6-
"""Reads densities from an ORCA output.
50+
"""Read densities from an ORCA output.
51+
52+
Tested on Orca versions 4.0, 4.2, and 5.0.
753
854
Args:
955
mol (pyscf Mole): pyscf Mole object.
1056
basename (str): Job name (without extension).
11-
version (int): ORCA version.
57+
Kwargs:
58+
directory (str) : path to the directory with the density files.
59+
version (int): ORCA version (400 for 4.0, 421 for 4.2, 500 for 5.0).
1260
openshell (bool): If read spin density in addition to the electron density.
1361
reorder_dest (str): Which AO ordering convention to use.
1462
@@ -38,8 +86,184 @@ def read_density(mol, basename, directory='./', version=500, openshell=False, re
3886
else:
3987
dm = np.fromfile(path[0], offset=8, count=mol.nao*mol.nao*nspin).reshape((nspin,mol.nao,mol.nao))
4088

89+
90+
is_def2 = 'def2' in pyscf.gto.basis._format_basis_name(mol.basis)
91+
has_3d = np.any([21 <= pyscf.data.elements.charge(q) <= 30 for q in mol.elements])
92+
if is_def2 and has_3d:
93+
msg = f'\n{path}:\nBasis set is not sorted wrt angular momenta for 3d elements.\nBetter use a gbw file.'
94+
warnings.warn(msg)
95+
if reorder_dest is not None:
96+
msg = f'\nDensity matrix reordering for ORCA to {reorder_dest} is compromised.'
97+
raise RuntimeError(msg)
98+
4199
if reorder_dest is not None:
42100
dm = np.array([reorder_ao(mol, i, src='orca', dest=reorder_dest) for i in dm])
43101

44102
dm = np.squeeze(dm)
45103
return dm
104+
105+
106+
def _parse_gbw(fname):
107+
""" Parse ORCA .gbw files.
108+
109+
Many thanks to
110+
https://pysisyphus.readthedocs.io/en/latest/_modules/pysisyphus/calculators/ORCA.html
111+
112+
Args:
113+
fname (str): path to the gbw file.
114+
115+
Returns:
116+
numpy 3darray of (s,nao,nao) containing the density matrix
117+
numpy 2darray of (s,nao) containing the MO energies
118+
numpy 2darray of (s,nao) containing the MO occupation numbers
119+
dict of {int : [int]} with a list of basis functions angular momenta
120+
for each atom (not for element!)
121+
"""
122+
123+
def read_array(f, n, dtype):
124+
return np.frombuffer(f.read(dtype().itemsize * n), dtype=dtype)
125+
126+
def read_mos():
127+
f.seek(24)
128+
offset = struct.unpack("<q", f.read(np.int64().itemsize))[0] # int64: pointer to orbitals
129+
f.seek(offset)
130+
sets = struct.unpack("<i", f.read(np.int32().itemsize))[0] # int32: number of MO sets
131+
nao = struct.unpack("<i", f.read(np.int32().itemsize))[0] # int32: number of orbitals
132+
assert sets in (1,2)
133+
134+
coefficients_ab = []
135+
energies_ab = []
136+
occupations_ab = []
137+
138+
for i in range(sets):
139+
coefficients = read_array(f, nao*nao, np.float64).reshape(-1, nao)
140+
occupations = read_array(f, nao, np.float64)
141+
energies = read_array(f, nao, np.float64)
142+
irreps = read_array(f, nao, np.int32)
143+
cores = read_array(f, nao, np.int32)
144+
coefficients_ab.append(coefficients)
145+
energies_ab.append(energies)
146+
occupations_ab.append(occupations)
147+
148+
coefficients_ab = np.array(coefficients_ab)
149+
energies_ab = np.array(energies_ab)
150+
occupations_ab = np.array(occupations_ab)
151+
return coefficients_ab, energies_ab, occupations_ab
152+
153+
def read_basis(MAX_PRIMITIVES=37):
154+
f.seek(16)
155+
offset = struct.unpack("<q", f.read(np.int64().itemsize))[0]
156+
f.seek(offset)
157+
_, nat = struct.unpack("<2i", f.read(2 * np.int32().itemsize))
158+
ls = {}
159+
for at in range(nat):
160+
ls[at] = []
161+
_, nao_at = struct.unpack("<2i", f.read(2 * np.int32().itemsize))
162+
for iao in range(nao_at):
163+
l, _, ngto, _ = struct.unpack("<4i", f.read(4 * np.int32().itemsize))
164+
a = read_array(f, MAX_PRIMITIVES, np.float64) # exponents
165+
c = read_array(f, MAX_PRIMITIVES, np.float64) # coefficients
166+
ls[at].append(l)
167+
return ls
168+
169+
with open(fname, "rb") as f:
170+
coefficients_ab, energies_ab, occupations_ab = read_mos()
171+
try:
172+
ls = read_basis()
173+
except: # Orca version 4.0
174+
ls = {}
175+
return coefficients_ab, energies_ab, occupations_ab, ls
176+
177+
178+
def _get_indices(mol, ls_from_orca):
179+
""" Get coefficient needed to reorder the AO read from Orca.
180+
181+
Args:
182+
mol (pyscf Mole): pyscf Mole object.
183+
ls_from_orca : dict of {int : [int]} with a list of basis functions
184+
angular momenta for those atoms (not elements!)
185+
whose basis functions are *not* sorted wrt to angular momenta.
186+
The lists represent the Orca order.
187+
188+
Returns:
189+
numpy int 1darray of (nao,) containing the indices to be used as
190+
c_reordered = c_orca[indices]
191+
"""
192+
if ls_from_orca is None:
193+
return None
194+
ao_limits = mol.offset_ao_by_atom()[:,2:]
195+
indices_full = np.arange(mol.nao)
196+
for iat, ls in ls_from_orca.items():
197+
indices = []
198+
i = 0
199+
for il, l in enumerate(ls):
200+
indices.append((l, il, i + np.arange(2*l+1)))
201+
i += 2*l+1
202+
indices = sorted(indices, key=lambda x: (x[0], x[1]))
203+
indices = np.array([j for i in indices for j in i[2]])
204+
atom_slice = np.s_[ao_limits[iat][0]:ao_limits[iat][1]]
205+
indices_full[atom_slice] = indices[:] + ao_limits[iat][0]
206+
assert np.all(sorted(indices_full)==np.arange(mol.nao))
207+
return indices_full
208+
209+
210+
def reorder_coeff_inplace(c_full, mol, reorder_dest='pyscf', ls_from_orca=None):
211+
""" Reorder coefficient read from ORCA .gbw
212+
213+
Args:
214+
c_full : numpy 3darray of (s,nao,nao) containing the MO coefficients
215+
to reorder
216+
mol (pyscf Mole): pyscf Mole object.
217+
Kwargs:
218+
reorder_dest (str): Which AO ordering convention to use.
219+
ls_from_orca : dict of {int : [int]} with a list of basis functions
220+
angular momenta for those atoms (not elements!)
221+
whose basis functions are *not* sorted wrt to angular momenta.
222+
The lists represent the Orca order.
223+
"""
224+
def _reorder_coeff(c):
225+
# In ORCA, at least def2-SVP and def2-TZVP for 3d metals
226+
# are not sorted wrt angular momenta
227+
idx = _get_indices(mol, ls_from_orca)
228+
for i in range(len(c)):
229+
if idx is not None:
230+
c[:,i] = c[idx,i]
231+
c[:,i] = reorder_ao(mol, c[:,i], src='orca', dest=reorder_dest)
232+
for i in range(c_full.shape[0]):
233+
_reorder_coeff(c_full[i])
234+
235+
236+
def read_gbw(mol, fname, reorder_dest='pyscf', sort_l=True):
237+
"""Read orbitals from an ORCA output.
238+
239+
Tested on Orca versions 4.2 and 5.0.
240+
Limited for Orca version 4.0 (cannot read the basis set).
241+
242+
Args:
243+
mol (pyscf Mole): pyscf Mole object.
244+
fname (str): path to the gbw file.
245+
Kwargs:
246+
reorder_dest (str): Which AO ordering convention to use.
247+
sort_l (bool): if sort the basis functions wrt angular momenta.
248+
e.g. PySCF requires them sorted.
249+
250+
Returns:
251+
numpy 3darray of (s,nao,nao) containing the MO coefficients
252+
numpy 2darray of (s,nao) containing the MO energies
253+
numpy 2darray of (s,nao) containing the MO occupation numbers
254+
s is 1 for closed-shell and 2 for open-shell computation.
255+
nao is number of atomic/molecular orbitals.
256+
"""
257+
c, e, occ, ls = _parse_gbw(fname)
258+
if not ls and sort_l:
259+
raise RuntimeError(f'{fname}: basis set information not found. Cannot check if the basis set is sorted wrt angular momenta. Put sort_l=False to ignore')
260+
261+
ls = {iat: lsiat for iat, lsiat in ls.items() if np.any(lsiat!=sorted(lsiat))}
262+
if ls and not sort_l:
263+
msg = f'\n{fname}: basis set is not sorted wrt angular momenta for atoms # {list(ls.keys())} and is kept as is'
264+
warnings.warn(msg)
265+
266+
if reorder_dest is not None:
267+
reorder_coeff_inplace(c, mol, reorder_dest, ls if (ls and sort_l) else None)
268+
return c, e, occ
269+

tests/data/orca/CEHZOF/CEHZOF.xyz

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
26
2+
0 1
3+
I -0.691564 6.065851 10.218679
4+
Co -0.896730 6.291743 12.801124
5+
S -3.111547 5.942715 12.672839
6+
C -0.900957 4.763642 14.175690
7+
C -0.461276 5.983590 14.795995
8+
C 0.713137 6.405881 14.113956
9+
H 1.264840 7.323863 14.311032
10+
C 0.993227 5.481683 13.083596
11+
H 1.802106 5.559932 12.358853
12+
C 0.015965 4.473016 13.122433
13+
H -0.056529 3.635907 12.428292
14+
C -2.210760 4.059666 14.447381
15+
H -2.673227 4.493474 15.348774
16+
H -2.035736 2.991419 14.671175
17+
C -3.111610 4.202498 13.259335
18+
H -2.727972 3.649740 12.386330
19+
H -4.146425 3.867362 13.435280
20+
C -3.945153 6.793732 14.050514
21+
H -3.869637 7.852090 13.755824
22+
H -3.340371 6.686944 14.965041
23+
C -5.368941 6.350425 14.251741
24+
H -5.442055 5.313038 14.619953
25+
H -5.857396 6.994746 15.002527
26+
H -5.951474 6.421791 13.318039
27+
I -1.173397 8.852094 12.837558
28+
H -0.948251 6.502776 15.620742
2.22 MB
Binary file not shown.
3.15 MB
Binary file not shown.

0 commit comments

Comments
 (0)