From 856445b8867372feaed9e22419cde97aa43e7a19 Mon Sep 17 00:00:00 2001 From: Liam Marsh Date: Mon, 9 Mar 2026 11:52:31 +0100 Subject: [PATCH 1/6] add joblib parallelisation to basis opt --- qstack/basis_opt/opt.py | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py index 6d2dcf7..9b02c27 100644 --- a/qstack/basis_opt/opt.py +++ b/qstack/basis_opt/opt.py @@ -9,7 +9,7 @@ import pyscf.data from ..compound import basis_flatten from . import basis_tools as qbbt - +import joblib def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in="CG", printlvl=2, check=False): """Optimize a given basis set. @@ -27,6 +27,9 @@ def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in= Dictionary containing the optimized basis. """ + + runner = joblib.Parallel(n_jobs=-1, return_as="generator_unordered") + def energy(x): """Compute total loss function (fitting error) for given exponents. @@ -38,9 +41,14 @@ def energy(x): """ exponents = np.exp(x) newbasis = qbbt.exp2basis(exponents, myelements, basis) - E = 0.0 - for m in moldata: - E += qbbt.energy_mol(newbasis, m) + E = sum(runner( + joblib.delayed(qbbt.energy_mol)(newbasis, m) + for m in moldata + ), start=0.0) + if printlvl>=2: + print("energy complete:", E, flush=True) + elif printlvl>=1: + print(end="", flush=True) return E def gradient(x): @@ -59,18 +67,25 @@ def gradient(x): E = 0.0 dE_da = np.zeros(nexp) - for m in moldata: + #for m in moldata: + def minirun(m): E_, dE_da_ = qbbt.gradient_mol(nexp, newbasis, m) - E += E_ - dE_da += dE_da_ if printlvl>=2: - print('e =', E_, '(', E_/m['self']*100.0, '%)') + print('1-compound gradient: e =', E_, '(', E_/m['self']*100.0, '%)') + return E_, dE_da_ + runs = runner(delayed(minirun)(m) for m in moldata) + for E_, dE_da_ in runs: + E += E_ + dE_da += dE_da_ + if printlvl>=2: - print(E, max(abs(dE_da))) + print("gradient complete:", E, max(abs(dE_da))) dE_da = qbbt.cut_myelements(dE_da, myelements, bf_bounds) if printlvl>=2: print(flush=True) + elif printlvl>=1: + print(end='',flush=True) dE_dx = dE_da * exponents return E, dE_dx @@ -217,7 +232,7 @@ def make_moldata(fname): return {'num': gr_num, 'an': gr_an, 'diff': gr_an-gr_num} xopt = scipy.optimize.minimize(energy, x1, method=method_in, jac=gradient_only, - options={'gtol': gtol_in, 'disp': printlvl}).x + options={'gtol': gtol_in, 'disp': printlvl>0}).x exponents = np.exp(xopt) newbasis = qbbt.exp2basis(exponents, myelements, basis) From 8e12795c804201ea29ed6df54f7657e1b9cd5ef6 Mon Sep 17 00:00:00 2001 From: Liam Marsh Date: Mon, 9 Mar 2026 14:58:50 +0100 Subject: [PATCH 2/6] feat: mild optimisation of basis_opt process --- qstack/basis_opt/basis_tools.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py index 337afcd..5066dea 100644 --- a/qstack/basis_opt/basis_tools.py +++ b/qstack/basis_opt/basis_tools.py @@ -24,8 +24,9 @@ def energy_mol(newbasis, moldata): newmol = df.make_auxmol(mol, newbasis) ao = dft.numint.eval_ao(newmol, coords).T w = np.einsum('pi,i,i->p', ao, rho, weights) - S = np.einsum('pi,qi,i->pq', ao, ao, weights) - c = np.linalg.solve(S, w) + #S = np.einsum('pi,qi,i->pq', ao, ao, weights) + S = newmol.intor('int1e') + c = np.linalg.solve(S, w, assume_a='pos') E = self-c@w return E @@ -56,14 +57,15 @@ def gradient_mol(nexp, newbasis, moldata): newmol = df.make_auxmol(mol, newbasis) ao = dft.numint.eval_ao(newmol, coords).T - w = np.einsum('pi,i,i->p', ao, rho, weights) + w = np.einsum('i,i,pi->p', rho, weights, ao) dw_da = np.zeros((nexp, newmol.nao)) for p in range(newmol.nao): iat = centers[p] r2 = distances[iat] dw_da[idx[p], p] = np.einsum('i,i,i,i->', ao[p], rho, r2, weights) - S = np.einsum('pi,qi,i->pq', ao, ao, weights) + #S = np.einsum('pi,qi,i->pq', ao, ao, weights) + S = newmol.intor('int1e') dS_da = np.zeros((nexp, newmol.nao, newmol.nao)) for p in range(newmol.nao): @@ -83,7 +85,7 @@ def gradient_mol(nexp, newbasis, moldata): dS_da[ip, q, p] += ip_p_q dS_da[iq, q, p] += iq_p_q - c = np.linalg.solve(S, w) + c = np.linalg.solve(S, w, assume_a='pos') part1 = np.einsum('p,ip->i', c, dw_da) part2 = np.einsum('p,ipq,q->i', c, dS_da, c) dE_da = 2.0*part1 - part2 From e81aa5f61874d91e9c897c4b8342ac16a2261ce5 Mon Sep 17 00:00:00 2001 From: Liam Marsh Date: Mon, 9 Mar 2026 17:54:58 +0100 Subject: [PATCH 3/6] feat: rewrite gradient eval for basis_opt --- qstack/basis_opt/basis_tools.py | 200 ++++++++++++++++++++++++++------ qstack/basis_opt/opt.py | 13 ++- 2 files changed, 172 insertions(+), 41 deletions(-) diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py index 5066dea..c703fd1 100644 --- a/qstack/basis_opt/basis_tools.py +++ b/qstack/basis_opt/basis_tools.py @@ -2,6 +2,7 @@ import copy import numpy as np +import scipy.linalg as spl from pyscf import df, dft @@ -23,15 +24,85 @@ def energy_mol(newbasis, moldata): newmol = df.make_auxmol(mol, newbasis) ao = dft.numint.eval_ao(newmol, coords).T - w = np.einsum('pi,i,i->p', ao, rho, weights) - #S = np.einsum('pi,qi,i->pq', ao, ao, weights) - S = newmol.intor('int1e') - c = np.linalg.solve(S, w, assume_a='pos') + w = np.einsum('i,i,pi->p', rho, weights, ao) + #S = np.einsum('i,pi,qi->pq', weights, ao, ao) + #S = (ao*weights)@ao.T + S = newmol.intor('int1e_ovlp') + c = spl.solve(S, w, assume_a='pos') E = self-c@w return E +# import time +# def gradient_mol(nexp, newbasis, moldata): +# """Compute loss function and gradient for one molecule with respect to basis exponents. + +# Args: +# nexp (int): Number of exponents. +# newbasis (dict): Basis set. +# moldata (dict): Dictionary containing molecular data. + +# Returns: +# tuple: A tuple containing: +# - E (float): Loss function value. +# - dE_da (numpy.ndarray): Gradient of loss function with respect to exponents. +# """ +# start = time.perf_counter() + +# mol = moldata['mol' ] +# rho = moldata['rho' ] +# coords = moldata['coords' ] +# weights = moldata['weights' ] +# self = moldata['self' ] +# idx = moldata['idx' ] +# centers = moldata['centers' ] +# distances = moldata['distances'] + +# newmol = df.make_auxmol(mol, newbasis) +# ao = dft.numint.eval_ao(newmol, coords).T + +# print(f'base eval done: {time.perf_counter()-start:f} s') +# w = np.einsum('i,i,pi->p', rho, weights, ao) +# dw_da = np.zeros((nexp, newmol.nao)) +# for p in range(newmol.nao): +# iat = centers[p] +# r2 = distances[iat] +# dw_da[idx[p], p] = np.einsum('i,i,i,i->', ao[p], rho, r2, weights) +# print(f'dw_da done: {time.perf_counter()-start:f} s') + +# #S = np.einsum('pi,qi,i->pq', ao, ao, weights) +# S = newmol.intor('int1e_ovlp') +# dS_da = np.zeros((nexp, newmol.nao, newmol.nao)) + +# print(idx) +# for p in range(newmol.nao): +# for q in range(p, newmol.nao): +# ip = idx[p] +# iq = idx[q] +# iatp = centers[p] +# iatq = centers[q] +# r2p = distances[iatp] +# r2q = distances[iatq] +# ao_ao_w = np.einsum('i,i,i->i', ao[p], ao[q], weights) +# ip_p_q = np.einsum('i,i->', ao_ao_w, r2p) +# iq_p_q = np.einsum('i,i->', ao_ao_w, r2q) +# dS_da[ip, p, q] += ip_p_q +# dS_da[iq, p, q] += iq_p_q +# if p != q: +# dS_da[ip, q, p] += ip_p_q +# dS_da[iq, q, p] += iq_p_q +# print(f'dS_da done: {time.perf_counter()-start:f} s') + +# c = spl.solve(S, w, assume_a='pos') +# part1 = np.einsum('p,ip->i', c, dw_da) +# part2 = np.einsum('p,ipq,q->i', c, dS_da, c) +# dE_da = 2.0*part1 - part2 +# E = self - c@w +# print(f'rest done: {time.perf_counter()-start:f} s') +# return E, dE_da + +import time def gradient_mol(nexp, newbasis, moldata): """Compute loss function and gradient for one molecule with respect to basis exponents. @@ -45,6 +116,8 @@ def gradient_mol(nexp, newbasis, moldata): - E (float): Loss function value. - dE_da (numpy.ndarray): Gradient of loss function with respect to exponents. """ + #start = time.perf_counter() + mol = moldata['mol' ] rho = moldata['rho' ] coords = moldata['coords' ] @@ -56,43 +129,98 @@ def gradient_mol(nexp, newbasis, moldata): newmol = df.make_auxmol(mol, newbasis) ao = dft.numint.eval_ao(newmol, coords).T - + #S = np.einsum('i,pi,qi->pq', weights, ao, ao) + #S = (ao*weights)@ao.T + S = newmol.intor('int1e_ovlp') w = np.einsum('i,i,pi->p', rho, weights, ao) - dw_da = np.zeros((nexp, newmol.nao)) - for p in range(newmol.nao): - iat = centers[p] - r2 = distances[iat] - dw_da[idx[p], p] = np.einsum('i,i,i,i->', ao[p], rho, r2, weights) - - #S = np.einsum('pi,qi,i->pq', ao, ao, weights) - S = newmol.intor('int1e') - dS_da = np.zeros((nexp, newmol.nao, newmol.nao)) - - for p in range(newmol.nao): - for q in range(p, newmol.nao): - ip = idx[p] - iq = idx[q] - iatp = centers[p] - iatq = centers[q] - r2p = distances[iatp] - r2q = distances[iatq] - ao_ao_w = np.einsum('i,i,i->i', ao[p], ao[q], weights) - ip_p_q = np.einsum('i,i->', ao_ao_w, r2p) - iq_p_q = np.einsum('i,i->', ao_ao_w, r2q) - dS_da[ip, p, q] += ip_p_q - dS_da[iq, p, q] += iq_p_q - if p != q: - dS_da[ip, q, p] += ip_p_q - dS_da[iq, q, p] += iq_p_q - - c = np.linalg.solve(S, w, assume_a='pos') - part1 = np.einsum('p,ip->i', c, dw_da) - part2 = np.einsum('p,ipq,q->i', c, dS_da, c) - dE_da = 2.0*part1 - part2 + c = spl.solve(S, w, assume_a='pos') E = self - c@w + del S, w + #print(f'energy done: {time.perf_counter()-start:f} s') + + + # ## the next block can also be expressed as: + # ao_mapper = np.zeros(newmol.nao, nexp) + # ao_mapper[ np.arange(newmol.nao), idx] = 1 + # dE_da = np.einsum('i,i,pi,pi,p,pc->c' + # rho, weights, + # distances[centers], ao, + # 2*c, + # ao_mapper + # ) + aoc_temp = ao * c[:, None] + aoc_per_e = np.zeros((nexp, rho.shape[0])) + for p,aoc in enumerate(aoc_temp): + aoc *= distances[centers[p]] + aoc_per_e[idx[p]] += aoc + dE_da = 2* (aoc_per_e @ (rho*weights)) + del aoc_temp, aoc_per_e + #print(f'dw part done: {time.perf_counter()-start:f} s') + + # ## check with original impl. + # dw_da = np.zeros((nexp, newmol.nao)) + # for p in range(newmol.nao): + # iat = centers[p] + # r2 = distances[iat] + # dw_da[idx[p], p] = np.einsum('i,i,i,i->', ao[p], rho, r2, weights) + # assert np.allclose(dE_da, 2*dw_da@c) + + + # ## the next block can also be expressed as: + # temp = np.einsum('i,pi,qi,p,q,pi->pq', + # weights, ao, ao, c, c, + # distances[centers], + # ) + # temp += temp.T + # temp.sum(axis=0) + # dE_da -= temp @ ao_mapper # dE_da -= c@dS_da@c + # ## alternatively + # dE_da -= np.einsum('i,pi,qi,p,q,pi,pc->c', + # weights, ao, ao, c, c, + # distances[centers], + # 2*ao_mapper, + # ) + + aoc = c[:, None] * ao + wao = aoc.sum(axis=0) + wao *= weights + + mapped = np.zeros((nexp, rho.shape[0])) + for p,aoc_slice in enumerate(aoc): + mapped[idx[p]] += aoc_slice * distances[centers[p]] + mapped *= wao + + # ## check with original impl. + # dS_da = np.zeros((nexp, newmol.nao, newmol.nao)) + # for p in range(newmol.nao): + # for q in range(p, newmol.nao): + # ip = idx[p] + # iq = idx[q] + # iatp = centers[p] + # iatq = centers[q] + # r2p = distances[iatp] + # r2q = distances[iatq] + # ao_ao_w = np.einsum('i,i,i->i', ao[p], ao[q], weights) + # ip_p_q = np.einsum('i,i->', ao_ao_w, r2p) + # iq_p_q = np.einsum('i,i->', ao_ao_w, r2q) + # dS_da[ip, p, q] += ip_p_q + # dS_da[iq, p, q] += iq_p_q + # if p != q: + # dS_da[ip, q, p] += ip_p_q + # dS_da[iq, q, p] += iq_p_q + # comparison = (dS_da@c)@c + # assert np.allclose(2*mapped.sum(axis=1), comparison) + + + + dE_da -= 2*mapped.sum(axis=1) + del mapped, wao, aoc + + #print(f'dS part done: {time.perf_counter()-start:f} s') return E, dE_da + def exp2basis(exponents, elements, basis): """Convert exponents array to basis set format. diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py index 9b02c27..fa06242 100644 --- a/qstack/basis_opt/opt.py +++ b/qstack/basis_opt/opt.py @@ -1,6 +1,6 @@ """Basis set optimization routines and command-line interface.""" -import sys +import sys, time from ast import literal_eval import argparse import numpy as np @@ -39,6 +39,7 @@ def energy(x): Returns: float: Loss function value. """ + start = time.perf_counter() exponents = np.exp(x) newbasis = qbbt.exp2basis(exponents, myelements, basis) E = sum(runner( @@ -46,7 +47,7 @@ def energy(x): for m in moldata ), start=0.0) if printlvl>=2: - print("energy complete:", E, flush=True) + print(f"energy complete: {E:g} (took {time.perf_counter()-start:f} s)", flush=True) elif printlvl>=1: print(end="", flush=True) return E @@ -62,6 +63,7 @@ def gradient(x): - E (float): Loss function value. - dE_dx (numpy.ndarray): Gradient with respect to log(exponents). """ + gradstart = time.perf_counter() exponents = np.exp(x) newbasis = qbbt.exp2basis(exponents, myelements, basis) @@ -69,17 +71,18 @@ def gradient(x): dE_da = np.zeros(nexp) #for m in moldata: def minirun(m): + start = time.perf_counter() E_, dE_da_ = qbbt.gradient_mol(nexp, newbasis, m) if printlvl>=2: - print('1-compound gradient: e =', E_, '(', E_/m['self']*100.0, '%)') + print(f"1-compound {(m['mol'].nao, m['rho'].shape)!r} gradient: err={E_:g} (rel: {E_/m['self']:g}) (took {time.perf_counter()-start:f} s)", flush=True) return E_, dE_da_ - runs = runner(delayed(minirun)(m) for m in moldata) + runs = runner(joblib.delayed(minirun)(m) for m in moldata) for E_, dE_da_ in runs: E += E_ dE_da += dE_da_ if printlvl>=2: - print("gradient complete:", E, max(abs(dE_da))) + print(f"gradient complete: {E:g} (took {time.perf_counter()-gradstart:f}s)", E, max(abs(dE_da))) dE_da = qbbt.cut_myelements(dE_da, myelements, bf_bounds) if printlvl>=2: From 80468fb4e670d16c74271cd2e48e783cc8e779da Mon Sep 17 00:00:00 2001 From: Liam Marsh Date: Mon, 23 Mar 2026 11:27:22 +0100 Subject: [PATCH 4/6] fix callability of basis optimisation, and added nwchem basis set output --- qstack/basis_opt/__main__.py | 54 +++++++++++++++++++++++++++++++++ qstack/basis_opt/basis_tools.py | 51 +++++++++++++++++++++++++++++-- qstack/basis_opt/opt.py | 8 +++-- 3 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 qstack/basis_opt/__main__.py diff --git a/qstack/basis_opt/__main__.py b/qstack/basis_opt/__main__.py new file mode 100644 index 0000000..d46357b --- /dev/null +++ b/qstack/basis_opt/__main__.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +""" +The command-line launcher for the basis set optimisation function of qstack.basis_opt.opt + +(can be called as `python3 -m qstack.basis_opt`, and has a cleaner import chain than `python3 -m qstack.basis_opt.opt`) +""" + +import sys, argparse +from . import basis_tools as qbbt +from .opt import optimize_basis + + +def _get_arg_parser(): + """Parse CLI arguments.""" + parser = argparse.ArgumentParser(description='Optimize a density fitting basis set.') + parser.add_argument('--elements', type=str, dest='elements', nargs='+', help='elements for optimization') + parser.add_argument('--basis', type=str, dest='basis', nargs='+', help='initial df bases', required=True) + parser.add_argument('--molecules', type=str, dest='molecules', nargs='+', help='molecules', required=True) + parser.add_argument('--gtol', type=float, dest='gtol', default=1e-7, help='tolerance') + parser.add_argument('--method', type=str, dest='method', default='CG', help='minimization algoritm') + parser.add_argument('--print', type=int, dest='print', default=2, help='printing level') + parser.add_argument('--check', action='store_true', dest='check', default=False, help='check the gradient and exit') + parser.add_argument('--output', type=str, dest='output', default=None, help='optional file to write the optimised basis set to (nwchem format, which pyscf can read)') + return parser + + +def main(): + """Run basis set optimization via command-line interface.""" + args = _get_arg_parser().parse_args() + + result = optimize_basis(args.elements, args.basis, args.molecules, args.gtol, args.method, check=args.check, printlvl=args.print) + if args.check is False: + if args.print==0: + qbbt.printbasis(result, sys.stdout) + if args.output is not None: + with open(args.output, 'w') as f: + qbbt.basis_as_nwchem(f, result, '[placeholder name for a qstack-optimised basis set]') + else: + gr_an, gr_num, gr_diff = result['an'], result['num'], result['diff'] + print('analytical gradient') + print(gr_an) + print('numerical gradient') + print(gr_num) + print('difference') + print(gr_diff) + print('relative difference') + print(gr_diff/gr_num) + print(flush=True) + + +if __name__ == "__main__": + main() + diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py index c703fd1..df376b9 100644 --- a/qstack/basis_opt/basis_tools.py +++ b/qstack/basis_opt/basis_tools.py @@ -4,6 +4,7 @@ import numpy as np import scipy.linalg as spl from pyscf import df, dft +import pyscf def energy_mol(newbasis, moldata): @@ -268,11 +269,57 @@ def printbasis(basis, f): f (file): File object to write to. """ print('{', file=f) - for q, b in basis.items(): + for i, (q, b) in enumerate(basis.items()): print(' "'+q+'": [', file=f) for i, gto in enumerate(b): if i > 0: print(',', file=f) print(' ', gto, file=f, end='') - print(' ]', file=f) + if i==len(basis)-1: + print('\n ]', file=f) + else: + print('\n ],', file=f) print('}', file=f) + + +def basis_as_nwchem(f_out, basis_dict, name='custom basis set'): + """Print basis set in NWchem format (the one pyscf reads from files) + + Prints a basis dictionnary into a provided file. The name of the basis, as indicated in the file itself, can be provided. + + Args: + f_out (writable io.TextIOBase): the file to write the basis to + basis_dict (pyscf-format basis dictionnary): the basis to write + name (str): the optional name of the basis + """ + + sorted_atom_types = sorted(basis_dict.keys(), key=pyscf.data.elements.charge) + angular_names = 'spdfgh' + + f_out.write(f"# {name:s}\n# (custom basis written by qstack)\n# supported elements: {', '.join(sorted_atom_types):s}\n\n") + f_out.write("BASIS \"ao basis\" PRINT") + for atom in sorted_atom_types: + fakemol = pyscf.M(atom=atom, charge=pyscf.data.elements.charge(atom)) + nbas = fakemol.nbas + l_count = max(fakemol.bas_angular(x) for x in range(nbas))+1 + + prim_count=[0]*l_count + shell_count=[0]*l_count + for bas_i in range(nbas): + l = fakemol.bas_angular(bas_i) + prim_count[l] += fakemol.bas_nprim(bas_i) + shell_count[l] += fakemol.bas_nctr(bas_i) + prim_mark = [f"{n:d}{angular_names[l]:s}" for l,n in enumerate(prim_count) ] + shell_mark = [f"{n:d}{angular_names[l]:s}" for l,n in enumerate(shell_count) ] + + f_out.write(f'#BASIS SET: ({",".join(prim_mark):s}) -> [{",".join(shell_mark):s}]\n') + + for bas_i in range(nbas): + l = fakemol.bas_angular(bas_i) + f_out.write(f"{atom:<2s} {angular_names[l].upper():s}\n") + exps = fakemol.bas_exp(bas_i) + coeffs = fakemol.bas_ctr_coeff(bas_i) + for exp, exp_coeff in zip(exps,coeffs): + fmt = "{:>15.7G}" + " {:>15.7G}"*len(exp_coeff) + "\n" + f_out.write(fmt.format(exp, *exp_coeff).replace('E','D')) + f_out.write('END\n') diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py index fa06242..c82d4e7 100644 --- a/qstack/basis_opt/opt.py +++ b/qstack/basis_opt/opt.py @@ -42,10 +42,12 @@ def energy(x): start = time.perf_counter() exponents = np.exp(x) newbasis = qbbt.exp2basis(exponents, myelements, basis) - E = sum(runner( + E = 0.0 + for value in runner( joblib.delayed(qbbt.energy_mol)(newbasis, m) for m in moldata - ), start=0.0) + ): + E += value if printlvl>=2: print(f"energy complete: {E:g} (took {time.perf_counter()-start:f} s)", flush=True) elif printlvl>=1: @@ -73,7 +75,7 @@ def gradient(x): def minirun(m): start = time.perf_counter() E_, dE_da_ = qbbt.gradient_mol(nexp, newbasis, m) - if printlvl>=2: + if printlvl>=3: print(f"1-compound {(m['mol'].nao, m['rho'].shape)!r} gradient: err={E_:g} (rel: {E_/m['self']:g}) (took {time.perf_counter()-start:f} s)", flush=True) return E_, dE_da_ runs = runner(joblib.delayed(minirun)(m) for m in moldata) From 64bd60e3096c924257ec735f86010509986a5321 Mon Sep 17 00:00:00 2001 From: Liam Marsh Date: Mon, 23 Mar 2026 11:44:40 +0100 Subject: [PATCH 5/6] fix regular printing and test --- qstack/basis_opt/basis_tools.py | 4 ++-- qstack/basis_opt/opt.py | 28 ++++++++++++++++++++-------- tests/test_opt.py | 33 ++++++++++++++++++++++++++++++++- 3 files changed, 54 insertions(+), 11 deletions(-) diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py index df376b9..483404c 100644 --- a/qstack/basis_opt/basis_tools.py +++ b/qstack/basis_opt/basis_tools.py @@ -269,13 +269,13 @@ def printbasis(basis, f): f (file): File object to write to. """ print('{', file=f) - for i, (q, b) in enumerate(basis.items()): + for atom_i, (q, b) in enumerate(basis.items()): print(' "'+q+'": [', file=f) for i, gto in enumerate(b): if i > 0: print(',', file=f) print(' ', gto, file=f, end='') - if i==len(basis)-1: + if atom_i==len(basis)-1: print('\n ]', file=f) else: print('\n ],', file=f) diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py index c82d4e7..d7fdaa3 100644 --- a/qstack/basis_opt/opt.py +++ b/qstack/basis_opt/opt.py @@ -9,7 +9,10 @@ import pyscf.data from ..compound import basis_flatten from . import basis_tools as qbbt -import joblib +try: + import joblib +except ImportError: + joblib = None def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in="CG", printlvl=2, check=False): """Optimize a given basis set. @@ -28,7 +31,8 @@ def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in= """ - runner = joblib.Parallel(n_jobs=-1, return_as="generator_unordered") + if joblib is not None: + runner = joblib.Parallel(n_jobs=-1, return_as="generator_unordered") def energy(x): """Compute total loss function (fitting error) for given exponents. @@ -43,11 +47,15 @@ def energy(x): exponents = np.exp(x) newbasis = qbbt.exp2basis(exponents, myelements, basis) E = 0.0 - for value in runner( - joblib.delayed(qbbt.energy_mol)(newbasis, m) - for m in moldata - ): - E += value + if joblib is not None: + for value in runner( + joblib.delayed(qbbt.energy_mol)(newbasis, m) + for m in moldata + ): + E += value + else: + for m in moldata: + E += qbbt.energy_mol(newbasis,m) if printlvl>=2: print(f"energy complete: {E:g} (took {time.perf_counter()-start:f} s)", flush=True) elif printlvl>=1: @@ -78,7 +86,11 @@ def minirun(m): if printlvl>=3: print(f"1-compound {(m['mol'].nao, m['rho'].shape)!r} gradient: err={E_:g} (rel: {E_/m['self']:g}) (took {time.perf_counter()-start:f} s)", flush=True) return E_, dE_da_ - runs = runner(joblib.delayed(minirun)(m) for m in moldata) + + if joblib is not None: + runs = runner(joblib.delayed(minirun)(m) for m in moldata) + else: + runs = (minirun(m) for m in moldata) for E_, dE_da_ in runs: E += E_ dE_da += dE_da_ diff --git a/tests/test_opt.py b/tests/test_opt.py index f5f216b..10fb46c 100755 --- a/tests/test_opt.py +++ b/tests/test_opt.py @@ -16,7 +16,38 @@ def test_hf_otpd(): g = basis_opt.opt.optimize_basis(['H'], [path+'/data/initial/H_N0.txt', path+'/data/initial/O_N0.txt'], [mol_dict], check=True, printlvl=0) assert (np.all(g['diff'] < 1e-6)) - ob_good = {'H': [[0, [42.30256758622713, 1]], [0, [6.83662718701579, 1]], [0, [1.8547192742478775, 1]], [0, [0.3797283290452742, 1]], [1, [12.961663119622536, 1]], [1, [2.507400755551906, 1]], [1, [0.6648804678758861, 1]], [2, [3.482167705165484, 1]], [2, [0.6053728887614225, 1]], [3, [0.6284190712545101, 1]]]} + # # with the grid-approximated self-overlap + # ob_good = { + # 'H': [ + # [0, [42.30256758622713, 1]], + # [0, [6.83662718701579, 1]], + # [0, [1.8547192742478775, 1]], + # [0, [0.3797283290452742, 1]], + # [1, [12.961663119622536, 1]], + # [1, [2.507400755551906, 1]], + # [1, [0.6648804678758861, 1]], + # [2, [3.482167705165484, 1]], + # [2, [0.6053728887614225, 1]], + # [3, [0.6284190712545101, 1]] + # ], + # } + + # # with the true self-overlap + ob_good = { + "H": [ + [0, [42.30282217079675, 1]], + [0, [6.836275058516526, 1]], + [0, [1.8548659895984756, 1]], + [0, [0.37973118342659834, 1]], + [1, [12.961642133527626, 1]], + [1, [2.5073719856017003, 1]], + [1, [0.664871596194868, 1]], + [2, [3.4821895465370454, 1]], + [2, [0.6053775933231458, 1]], + [3, [0.6284190862766469, 1]] + ], + } + ob = basis_opt.opt.optimize_basis(['H'], [path+'/data/initial/H_N0.txt'], [path+'/data/H2.ccpvtz.grid3.npz'], printlvl=2, gtol_in=1e-5) for [_l,[a,_c]], [_l1,[a1,_c1]] in zip(ob_good['H'], ob['H'], strict=True): assert (abs(a-a1)<1e-5) From 8084acd3f725e4cbd01f77ddae6a445bb1831ddd Mon Sep 17 00:00:00 2001 From: Liam Marsh Date: Mon, 23 Mar 2026 11:59:36 +0100 Subject: [PATCH 6/6] chore: make ruff happy --- docs/generate_rst.py | 5 ++--- qstack/basis_opt/__main__.py | 6 +++--- qstack/basis_opt/basis_tools.py | 14 ++++++-------- qstack/basis_opt/opt.py | 4 ++-- tests/test_opt.py | 2 +- 5 files changed, 14 insertions(+), 17 deletions(-) mode change 100644 => 100755 qstack/basis_opt/__main__.py diff --git a/docs/generate_rst.py b/docs/generate_rst.py index ce15334..ed9b8ba 100755 --- a/docs/generate_rst.py +++ b/docs/generate_rst.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -""" -Generate Sphinx-ready .rst files +"""Generate Sphinx-ready .rst files Used instead of Sphinx to avoid importing broken or expensive dependecies @@ -147,7 +146,7 @@ def extract_module_info(py_path: Path, module_name: str) -> ModuleInfo: try: tree = ast.parse(src) except SyntaxError: - return ModuleInfo(module_name, py_path, None, [], [], None, [],) + return ModuleInfo(module_name, py_path, None, [], [], None, []) mdoc = ast.get_docstring(tree) classes: list[ClassInfo] = [] diff --git a/qstack/basis_opt/__main__.py b/qstack/basis_opt/__main__.py old mode 100644 new mode 100755 index d46357b..5a2df8e --- a/qstack/basis_opt/__main__.py +++ b/qstack/basis_opt/__main__.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 -""" -The command-line launcher for the basis set optimisation function of qstack.basis_opt.opt +"""The command-line launcher for the basis set optimisation function of `qstack.basis_opt.opt`. (can be called as `python3 -m qstack.basis_opt`, and has a cleaner import chain than `python3 -m qstack.basis_opt.opt`) """ -import sys, argparse +import sys +import argparse from . import basis_tools as qbbt from .opt import optimize_basis diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py index 483404c..96cd909 100644 --- a/qstack/basis_opt/basis_tools.py +++ b/qstack/basis_opt/basis_tools.py @@ -103,7 +103,6 @@ def energy_mol(newbasis, moldata): # print(f'rest done: {time.perf_counter()-start:f} s') # return E, dE_da -import time def gradient_mol(nexp, newbasis, moldata): """Compute loss function and gradient for one molecule with respect to basis exponents. @@ -165,8 +164,8 @@ def gradient_mol(nexp, newbasis, moldata): # r2 = distances[iat] # dw_da[idx[p], p] = np.einsum('i,i,i,i->', ao[p], rho, r2, weights) # assert np.allclose(dE_da, 2*dw_da@c) - - + + # ## the next block can also be expressed as: # temp = np.einsum('i,pi,qi,p,q,pi->pq', # weights, ao, ao, c, c, @@ -213,10 +212,10 @@ def gradient_mol(nexp, newbasis, moldata): # assert np.allclose(2*mapped.sum(axis=1), comparison) - + dE_da -= 2*mapped.sum(axis=1) del mapped, wao, aoc - + #print(f'dS part done: {time.perf_counter()-start:f} s') return E, dE_da @@ -283,7 +282,7 @@ def printbasis(basis, f): def basis_as_nwchem(f_out, basis_dict, name='custom basis set'): - """Print basis set in NWchem format (the one pyscf reads from files) + """Print basis set in NWchem format (the one pyscf reads from files). Prints a basis dictionnary into a provided file. The name of the basis, as indicated in the file itself, can be provided. @@ -292,7 +291,6 @@ def basis_as_nwchem(f_out, basis_dict, name='custom basis set'): basis_dict (pyscf-format basis dictionnary): the basis to write name (str): the optional name of the basis """ - sorted_atom_types = sorted(basis_dict.keys(), key=pyscf.data.elements.charge) angular_names = 'spdfgh' @@ -319,7 +317,7 @@ def basis_as_nwchem(f_out, basis_dict, name='custom basis set'): f_out.write(f"{atom:<2s} {angular_names[l].upper():s}\n") exps = fakemol.bas_exp(bas_i) coeffs = fakemol.bas_ctr_coeff(bas_i) - for exp, exp_coeff in zip(exps,coeffs): + for exp, exp_coeff in zip(exps, coeffs, strict=True): fmt = "{:>15.7G}" + " {:>15.7G}"*len(exp_coeff) + "\n" f_out.write(fmt.format(exp, *exp_coeff).replace('E','D')) f_out.write('END\n') diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py index d7fdaa3..aa065d0 100644 --- a/qstack/basis_opt/opt.py +++ b/qstack/basis_opt/opt.py @@ -1,6 +1,7 @@ """Basis set optimization routines and command-line interface.""" -import sys, time +import sys +import time from ast import literal_eval import argparse import numpy as np @@ -30,7 +31,6 @@ def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in= Dictionary containing the optimized basis. """ - if joblib is not None: runner = joblib.Parallel(n_jobs=-1, return_as="generator_unordered") diff --git a/tests/test_opt.py b/tests/test_opt.py index 10fb46c..2fb4a1b 100755 --- a/tests/test_opt.py +++ b/tests/test_opt.py @@ -44,7 +44,7 @@ def test_hf_otpd(): [1, [0.664871596194868, 1]], [2, [3.4821895465370454, 1]], [2, [0.6053775933231458, 1]], - [3, [0.6284190862766469, 1]] + [3, [0.6284190862766469, 1]], ], }