|
| 1 | +import warnings |
| 2 | +import struct |
1 | 3 | import numpy as np
|
| 4 | +import pyscf |
2 | 5 | from qstack.mathutils.matrix import from_tril
|
3 | 6 | from qstack.tools import reorder_ao
|
4 | 7 |
|
| 8 | + |
| 9 | +def read_input(fname, basis, ecp=None): |
| 10 | + """Read the structure from an Orca input (XYZ coordinates in simple format only) |
| 11 | +
|
| 12 | + Note: we do not read basis set info from the file. |
| 13 | + TODO: read also %coords block? |
| 14 | +
|
| 15 | + Args: |
| 16 | + fname (str) : path to file |
| 17 | + basis (str/dict) : basis name, path to file, or dict in the pyscf format |
| 18 | + Kwargs: |
| 19 | + ecp (str) : ECP to use |
| 20 | +
|
| 21 | + Returns: |
| 22 | + pyscf Mole object. |
| 23 | + """ |
| 24 | + |
| 25 | + with open(fname, "r") as f: |
| 26 | + lines = [x.strip() for x in f.readlines()] |
| 27 | + |
| 28 | + command_line = '\n'.join(y[1:] for y in filter(lambda x: x.startswith('!'), lines)).lower().split() |
| 29 | + if 'bohrs' in command_line: |
| 30 | + unit = 'Bohr' |
| 31 | + else: |
| 32 | + unit = 'Angstrom' |
| 33 | + |
| 34 | + idx_xyz_0, idx_xyz_1 = [y[0] for y in filter(lambda x: x[1].startswith('*'), enumerate(lines))][:2] |
| 35 | + charge, mult = map(int, lines[idx_xyz_0][1:].split()[1:]) |
| 36 | + molxyz = '\n'.join(lines[idx_xyz_0+1:idx_xyz_1]) |
| 37 | + |
| 38 | + mol = pyscf.gto.Mole() |
| 39 | + mol.atom = molxyz |
| 40 | + mol.charge = charge |
| 41 | + mol.spin = mult-1 |
| 42 | + mol.unit = unit |
| 43 | + mol.basis = basis |
| 44 | + mol.ecp = ecp |
| 45 | + mol.build() |
| 46 | + return mol |
| 47 | + |
| 48 | + |
5 | 49 | def read_density(mol, basename, directory='./', version=500, openshell=False, reorder_dest='pyscf'):
|
6 |
| - """Reads densities from an ORCA output. |
| 50 | + """Read densities from an ORCA output. |
| 51 | +
|
| 52 | + Tested on Orca versions 4.0, 4.2, and 5.0. |
7 | 53 |
|
8 | 54 | Args:
|
9 | 55 | mol (pyscf Mole): pyscf Mole object.
|
10 | 56 | basename (str): Job name (without extension).
|
11 |
| - version (int): ORCA version. |
| 57 | + Kwargs: |
| 58 | + directory (str) : path to the directory with the density files. |
| 59 | + version (int): ORCA version (400 for 4.0, 421 for 4.2, 500 for 5.0). |
12 | 60 | openshell (bool): If read spin density in addition to the electron density.
|
13 | 61 | reorder_dest (str): Which AO ordering convention to use.
|
14 | 62 |
|
@@ -38,8 +86,184 @@ def read_density(mol, basename, directory='./', version=500, openshell=False, re
|
38 | 86 | else:
|
39 | 87 | dm = np.fromfile(path[0], offset=8, count=mol.nao*mol.nao*nspin).reshape((nspin,mol.nao,mol.nao))
|
40 | 88 |
|
| 89 | + |
| 90 | + is_def2 = 'def2' in pyscf.gto.basis._format_basis_name(mol.basis) |
| 91 | + has_3d = np.any([21 <= pyscf.data.elements.charge(q) <= 30 for q in mol.elements]) |
| 92 | + if is_def2 and has_3d: |
| 93 | + msg = f'\n{path}:\nBasis set is not sorted wrt angular momenta for 3d elements.\nBetter use a gbw file.' |
| 94 | + warnings.warn(msg) |
| 95 | + if reorder_dest is not None: |
| 96 | + msg = f'\nDensity matrix reordering for ORCA to {reorder_dest} is compromised.' |
| 97 | + raise RuntimeError(msg) |
| 98 | + |
41 | 99 | if reorder_dest is not None:
|
42 | 100 | dm = np.array([reorder_ao(mol, i, src='orca', dest=reorder_dest) for i in dm])
|
43 | 101 |
|
44 | 102 | dm = np.squeeze(dm)
|
45 | 103 | return dm
|
| 104 | + |
| 105 | + |
| 106 | +def _parse_gbw(fname): |
| 107 | + """ Parse ORCA .gbw files. |
| 108 | +
|
| 109 | + Many thanks to |
| 110 | + https://pysisyphus.readthedocs.io/en/latest/_modules/pysisyphus/calculators/ORCA.html |
| 111 | +
|
| 112 | + Args: |
| 113 | + fname (str): path to the gbw file. |
| 114 | +
|
| 115 | + Returns: |
| 116 | + numpy 3darray of (s,nao,nao) containing the density matrix |
| 117 | + numpy 2darray of (s,nao) containing the MO energies |
| 118 | + numpy 2darray of (s,nao) containing the MO occupation numbers |
| 119 | + dict of {int : [int]} with a list of basis functions angular momenta |
| 120 | + for each atom (not for element!) |
| 121 | + """ |
| 122 | + |
| 123 | + def read_array(f, n, dtype): |
| 124 | + return np.frombuffer(f.read(dtype().itemsize * n), dtype=dtype) |
| 125 | + |
| 126 | + def read_mos(): |
| 127 | + f.seek(24) |
| 128 | + offset = struct.unpack("<q", f.read(np.int64().itemsize))[0] # int64: pointer to orbitals |
| 129 | + f.seek(offset) |
| 130 | + sets = struct.unpack("<i", f.read(np.int32().itemsize))[0] # int32: number of MO sets |
| 131 | + nao = struct.unpack("<i", f.read(np.int32().itemsize))[0] # int32: number of orbitals |
| 132 | + assert sets in (1,2) |
| 133 | + |
| 134 | + coefficients_ab = [] |
| 135 | + energies_ab = [] |
| 136 | + occupations_ab = [] |
| 137 | + |
| 138 | + for i in range(sets): |
| 139 | + coefficients = read_array(f, nao*nao, np.float64).reshape(-1, nao) |
| 140 | + occupations = read_array(f, nao, np.float64) |
| 141 | + energies = read_array(f, nao, np.float64) |
| 142 | + irreps = read_array(f, nao, np.int32) |
| 143 | + cores = read_array(f, nao, np.int32) |
| 144 | + coefficients_ab.append(coefficients) |
| 145 | + energies_ab.append(energies) |
| 146 | + occupations_ab.append(occupations) |
| 147 | + |
| 148 | + coefficients_ab = np.array(coefficients_ab) |
| 149 | + energies_ab = np.array(energies_ab) |
| 150 | + occupations_ab = np.array(occupations_ab) |
| 151 | + return coefficients_ab, energies_ab, occupations_ab |
| 152 | + |
| 153 | + def read_basis(MAX_PRIMITIVES=37): |
| 154 | + f.seek(16) |
| 155 | + offset = struct.unpack("<q", f.read(np.int64().itemsize))[0] |
| 156 | + f.seek(offset) |
| 157 | + _, nat = struct.unpack("<2i", f.read(2 * np.int32().itemsize)) |
| 158 | + ls = {} |
| 159 | + for at in range(nat): |
| 160 | + ls[at] = [] |
| 161 | + _, nao_at = struct.unpack("<2i", f.read(2 * np.int32().itemsize)) |
| 162 | + for iao in range(nao_at): |
| 163 | + l, _, ngto, _ = struct.unpack("<4i", f.read(4 * np.int32().itemsize)) |
| 164 | + a = read_array(f, MAX_PRIMITIVES, np.float64) # exponents |
| 165 | + c = read_array(f, MAX_PRIMITIVES, np.float64) # coefficients |
| 166 | + ls[at].append(l) |
| 167 | + return ls |
| 168 | + |
| 169 | + with open(fname, "rb") as f: |
| 170 | + coefficients_ab, energies_ab, occupations_ab = read_mos() |
| 171 | + try: |
| 172 | + ls = read_basis() |
| 173 | + except: # Orca version 4.0 |
| 174 | + ls = {} |
| 175 | + return coefficients_ab, energies_ab, occupations_ab, ls |
| 176 | + |
| 177 | + |
| 178 | +def _get_indices(mol, ls_from_orca): |
| 179 | + """ Get coefficient needed to reorder the AO read from Orca. |
| 180 | +
|
| 181 | + Args: |
| 182 | + mol (pyscf Mole): pyscf Mole object. |
| 183 | + ls_from_orca : dict of {int : [int]} with a list of basis functions |
| 184 | + angular momenta for those atoms (not elements!) |
| 185 | + whose basis functions are *not* sorted wrt to angular momenta. |
| 186 | + The lists represent the Orca order. |
| 187 | +
|
| 188 | + Returns: |
| 189 | + numpy int 1darray of (nao,) containing the indices to be used as |
| 190 | + c_reordered = c_orca[indices] |
| 191 | + """ |
| 192 | + if ls_from_orca is None: |
| 193 | + return None |
| 194 | + ao_limits = mol.offset_ao_by_atom()[:,2:] |
| 195 | + indices_full = np.arange(mol.nao) |
| 196 | + for iat, ls in ls_from_orca.items(): |
| 197 | + indices = [] |
| 198 | + i = 0 |
| 199 | + for il, l in enumerate(ls): |
| 200 | + indices.append((l, il, i + np.arange(2*l+1))) |
| 201 | + i += 2*l+1 |
| 202 | + indices = sorted(indices, key=lambda x: (x[0], x[1])) |
| 203 | + indices = np.array([j for i in indices for j in i[2]]) |
| 204 | + atom_slice = np.s_[ao_limits[iat][0]:ao_limits[iat][1]] |
| 205 | + indices_full[atom_slice] = indices[:] + ao_limits[iat][0] |
| 206 | + assert np.all(sorted(indices_full)==np.arange(mol.nao)) |
| 207 | + return indices_full |
| 208 | + |
| 209 | + |
| 210 | +def reorder_coeff_inplace(c_full, mol, reorder_dest='pyscf', ls_from_orca=None): |
| 211 | + """ Reorder coefficient read from ORCA .gbw |
| 212 | +
|
| 213 | + Args: |
| 214 | + c_full : numpy 3darray of (s,nao,nao) containing the MO coefficients |
| 215 | + to reorder |
| 216 | + mol (pyscf Mole): pyscf Mole object. |
| 217 | + Kwargs: |
| 218 | + reorder_dest (str): Which AO ordering convention to use. |
| 219 | + ls_from_orca : dict of {int : [int]} with a list of basis functions |
| 220 | + angular momenta for those atoms (not elements!) |
| 221 | + whose basis functions are *not* sorted wrt to angular momenta. |
| 222 | + The lists represent the Orca order. |
| 223 | + """ |
| 224 | + def _reorder_coeff(c): |
| 225 | + # In ORCA, at least def2-SVP and def2-TZVP for 3d metals |
| 226 | + # are not sorted wrt angular momenta |
| 227 | + idx = _get_indices(mol, ls_from_orca) |
| 228 | + for i in range(len(c)): |
| 229 | + if idx is not None: |
| 230 | + c[:,i] = c[idx,i] |
| 231 | + c[:,i] = reorder_ao(mol, c[:,i], src='orca', dest=reorder_dest) |
| 232 | + for i in range(c_full.shape[0]): |
| 233 | + _reorder_coeff(c_full[i]) |
| 234 | + |
| 235 | + |
| 236 | +def read_gbw(mol, fname, reorder_dest='pyscf', sort_l=True): |
| 237 | + """Read orbitals from an ORCA output. |
| 238 | +
|
| 239 | + Tested on Orca versions 4.2 and 5.0. |
| 240 | + Limited for Orca version 4.0 (cannot read the basis set). |
| 241 | +
|
| 242 | + Args: |
| 243 | + mol (pyscf Mole): pyscf Mole object. |
| 244 | + fname (str): path to the gbw file. |
| 245 | + Kwargs: |
| 246 | + reorder_dest (str): Which AO ordering convention to use. |
| 247 | + sort_l (bool): if sort the basis functions wrt angular momenta. |
| 248 | + e.g. PySCF requires them sorted. |
| 249 | +
|
| 250 | + Returns: |
| 251 | + numpy 3darray of (s,nao,nao) containing the MO coefficients |
| 252 | + numpy 2darray of (s,nao) containing the MO energies |
| 253 | + numpy 2darray of (s,nao) containing the MO occupation numbers |
| 254 | + s is 1 for closed-shell and 2 for open-shell computation. |
| 255 | + nao is number of atomic/molecular orbitals. |
| 256 | + """ |
| 257 | + c, e, occ, ls = _parse_gbw(fname) |
| 258 | + if not ls and sort_l: |
| 259 | + raise RuntimeError(f'{fname}: basis set information not found. Cannot check if the basis set is sorted wrt angular momenta. Put sort_l=False to ignore') |
| 260 | + |
| 261 | + ls = {iat: lsiat for iat, lsiat in ls.items() if np.any(lsiat!=sorted(lsiat))} |
| 262 | + if ls and not sort_l: |
| 263 | + msg = f'\n{fname}: basis set is not sorted wrt angular momenta for atoms # {list(ls.keys())} and is kept as is' |
| 264 | + warnings.warn(msg) |
| 265 | + |
| 266 | + if reorder_dest is not None: |
| 267 | + reorder_coeff_inplace(c, mol, reorder_dest, ls if (ls and sort_l) else None) |
| 268 | + return c, e, occ |
| 269 | + |
0 commit comments