From 856445b8867372feaed9e22419cde97aa43e7a19 Mon Sep 17 00:00:00 2001
From: Liam Marsh <liam.marsh@epfl.ch>
Date: Mon, 9 Mar 2026 11:52:31 +0100
Subject: [PATCH 1/6] add joblib parallelisation to basis opt

---
 qstack/basis_opt/opt.py | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py
index 6d2dcf7..9b02c27 100644
--- a/qstack/basis_opt/opt.py
+++ b/qstack/basis_opt/opt.py
@@ -9,7 +9,7 @@
 import pyscf.data
 from ..compound import basis_flatten
 from . import basis_tools as qbbt
-
+import joblib
 
 def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in="CG", printlvl=2, check=False):
     """Optimize a given basis set.
@@ -27,6 +27,9 @@ def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in=
         Dictionary containing the optimized basis.
 
     """
+
+    runner = joblib.Parallel(n_jobs=-1, return_as="generator_unordered")
+
     def energy(x):
         """Compute total loss function (fitting error) for given exponents.
 
@@ -38,9 +41,14 @@ def energy(x):
         """
         exponents = np.exp(x)
         newbasis = qbbt.exp2basis(exponents, myelements, basis)
-        E = 0.0
-        for m in moldata:
-            E += qbbt.energy_mol(newbasis, m)
+        E = sum(runner(
+            joblib.delayed(qbbt.energy_mol)(newbasis, m)
+            for m in moldata
+        ), start=0.0)
+        if printlvl>=2:
+            print("energy complete:", E, flush=True)
+        elif printlvl>=1:
+            print(end="", flush=True)
         return E
 
     def gradient(x):
@@ -59,18 +67,25 @@ def gradient(x):
 
         E = 0.0
         dE_da = np.zeros(nexp)
-        for m in moldata:
+        #for m in moldata:
+        def minirun(m):
             E_, dE_da_ = qbbt.gradient_mol(nexp, newbasis, m)
-            E     += E_
-            dE_da += dE_da_
             if printlvl>=2:
-                print('e =', E_, '(', E_/m['self']*100.0, '%)')
+                print('1-compound gradient: e =', E_, '(', E_/m['self']*100.0, '%)')
+            return E_, dE_da_
+        runs = runner(delayed(minirun)(m) for m in moldata)
+        for E_, dE_da_ in runs:
+            E += E_
+            dE_da += dE_da_
+
         if printlvl>=2:
-            print(E, max(abs(dE_da)))
+            print("gradient complete:", E, max(abs(dE_da)))
         dE_da = qbbt.cut_myelements(dE_da, myelements, bf_bounds)
 
         if printlvl>=2:
             print(flush=True)
+        elif printlvl>=1:
+            print(end='',flush=True)
 
         dE_dx = dE_da * exponents
         return E, dE_dx
@@ -217,7 +232,7 @@ def make_moldata(fname):
         return {'num': gr_num, 'an': gr_an, 'diff': gr_an-gr_num}
 
     xopt = scipy.optimize.minimize(energy, x1, method=method_in, jac=gradient_only,
-                                   options={'gtol': gtol_in, 'disp': printlvl}).x
+                                   options={'gtol': gtol_in, 'disp': printlvl>0}).x
 
     exponents = np.exp(xopt)
     newbasis = qbbt.exp2basis(exponents, myelements, basis)

From 8e12795c804201ea29ed6df54f7657e1b9cd5ef6 Mon Sep 17 00:00:00 2001
From: Liam Marsh <liam.marsh@epfl.ch>
Date: Mon, 9 Mar 2026 14:58:50 +0100
Subject: [PATCH 2/6] feat: mild optimisation of basis_opt process

---
 qstack/basis_opt/basis_tools.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py
index 337afcd..5066dea 100644
--- a/qstack/basis_opt/basis_tools.py
+++ b/qstack/basis_opt/basis_tools.py
@@ -24,8 +24,9 @@ def energy_mol(newbasis, moldata):
     newmol = df.make_auxmol(mol, newbasis)
     ao = dft.numint.eval_ao(newmol, coords).T
     w = np.einsum('pi,i,i->p', ao, rho, weights)
-    S = np.einsum('pi,qi,i->pq', ao, ao, weights)
-    c = np.linalg.solve(S, w)
+    #S = np.einsum('pi,qi,i->pq', ao, ao, weights)
+    S = newmol.intor('int1e')
+    c = np.linalg.solve(S, w, assume_a='pos')
     E = self-c@w
 
     return E
@@ -56,14 +57,15 @@ def gradient_mol(nexp, newbasis, moldata):
     newmol = df.make_auxmol(mol, newbasis)
     ao = dft.numint.eval_ao(newmol, coords).T
 
-    w = np.einsum('pi,i,i->p', ao, rho, weights)
+    w = np.einsum('i,i,pi->p', rho, weights, ao)
     dw_da = np.zeros((nexp, newmol.nao))
     for p in range(newmol.nao):
         iat = centers[p]
         r2 = distances[iat]
         dw_da[idx[p], p] = np.einsum('i,i,i,i->', ao[p], rho, r2, weights)
 
-    S = np.einsum('pi,qi,i->pq', ao, ao, weights)
+    #S = np.einsum('pi,qi,i->pq', ao, ao, weights)
+    S = newmol.intor('int1e')
     dS_da = np.zeros((nexp, newmol.nao, newmol.nao))
 
     for p in range(newmol.nao):
@@ -83,7 +85,7 @@ def gradient_mol(nexp, newbasis, moldata):
                 dS_da[ip, q, p] += ip_p_q
                 dS_da[iq, q, p] += iq_p_q
 
-    c = np.linalg.solve(S, w)
+    c = np.linalg.solve(S, w, assume_a='pos')
     part1 = np.einsum('p,ip->i',  c, dw_da)
     part2 = np.einsum('p,ipq,q->i',  c, dS_da, c)
     dE_da = 2.0*part1 - part2

From e81aa5f61874d91e9c897c4b8342ac16a2261ce5 Mon Sep 17 00:00:00 2001
From: Liam Marsh <liam.marsh@epfl.ch>
Date: Mon, 9 Mar 2026 17:54:58 +0100
Subject: [PATCH 3/6] feat: rewrite gradient eval for basis_opt

---
 qstack/basis_opt/basis_tools.py | 200 ++++++++++++++++++++++++++------
 qstack/basis_opt/opt.py         |  13 ++-
 2 files changed, 172 insertions(+), 41 deletions(-)

diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py
index 5066dea..c703fd1 100644
--- a/qstack/basis_opt/basis_tools.py
+++ b/qstack/basis_opt/basis_tools.py
@@ -2,6 +2,7 @@
 
 import copy
 import numpy as np
+import scipy.linalg as spl
 from pyscf import df, dft
 
 
@@ -23,15 +24,85 @@ def energy_mol(newbasis, moldata):
 
     newmol = df.make_auxmol(mol, newbasis)
     ao = dft.numint.eval_ao(newmol, coords).T
-    w = np.einsum('pi,i,i->p', ao, rho, weights)
-    #S = np.einsum('pi,qi,i->pq', ao, ao, weights)
-    S = newmol.intor('int1e')
-    c = np.linalg.solve(S, w, assume_a='pos')
+    w = np.einsum('i,i,pi->p', rho, weights, ao)
+    #S = np.einsum('i,pi,qi->pq', weights, ao, ao)
+    #S = (ao*weights)@ao.T
+    S = newmol.intor('int1e_ovlp')
+    c = spl.solve(S, w, assume_a='pos')
     E = self-c@w
 
     return E
 
 
+# import time
+# def gradient_mol(nexp, newbasis, moldata):
+#     """Compute loss function and gradient for one molecule with respect to basis exponents.
+
+#     Args:
+#         nexp (int): Number of exponents.
+#         newbasis (dict): Basis set.
+#         moldata (dict): Dictionary containing molecular data.
+
+#     Returns:
+#         tuple: A tuple containing:
+#         - E (float): Loss function value.
+#         - dE_da (numpy.ndarray): Gradient of loss function with respect to exponents.
+#     """
+#     start = time.perf_counter()
+
+#     mol       = moldata['mol'      ]
+#     rho       = moldata['rho'      ]
+#     coords    = moldata['coords'   ]
+#     weights   = moldata['weights'  ]
+#     self      = moldata['self'     ]
+#     idx       = moldata['idx'      ]
+#     centers   = moldata['centers'  ]
+#     distances = moldata['distances']
+
+#     newmol = df.make_auxmol(mol, newbasis)
+#     ao = dft.numint.eval_ao(newmol, coords).T
+
+#     print(f'base eval done: {time.perf_counter()-start:f} s')
+#     w = np.einsum('i,i,pi->p', rho, weights, ao)
+#     dw_da = np.zeros((nexp, newmol.nao))
+#     for p in range(newmol.nao):
+#         iat = centers[p]
+#         r2 = distances[iat]
+#         dw_da[idx[p], p] = np.einsum('i,i,i,i->', ao[p], rho, r2, weights)
+#     print(f'dw_da done: {time.perf_counter()-start:f} s')
+
+#     #S = np.einsum('pi,qi,i->pq', ao, ao, weights)
+#     S = newmol.intor('int1e_ovlp')
+#     dS_da = np.zeros((nexp, newmol.nao, newmol.nao))
+
+#     print(idx)
+#     for p in range(newmol.nao):
+#         for q in range(p, newmol.nao):
+#             ip = idx[p]
+#             iq = idx[q]
+#             iatp = centers[p]
+#             iatq = centers[q]
+#             r2p = distances[iatp]
+#             r2q = distances[iatq]
+#             ao_ao_w = np.einsum('i,i,i->i', ao[p], ao[q], weights)
+#             ip_p_q = np.einsum('i,i->', ao_ao_w, r2p)
+#             iq_p_q = np.einsum('i,i->', ao_ao_w, r2q)
+#             dS_da[ip, p, q] += ip_p_q
+#             dS_da[iq, p, q] += iq_p_q
+#             if p != q:
+#                 dS_da[ip, q, p] += ip_p_q
+#                 dS_da[iq, q, p] += iq_p_q
+#     print(f'dS_da done: {time.perf_counter()-start:f} s')
+
+#     c = spl.solve(S, w, assume_a='pos')
+#     part1 = np.einsum('p,ip->i',  c, dw_da)
+#     part2 = np.einsum('p,ipq,q->i',  c, dS_da, c)
+#     dE_da = 2.0*part1 - part2
+#     E = self - c@w
+#     print(f'rest done: {time.perf_counter()-start:f} s')
+#     return E, dE_da
+
+import time
 def gradient_mol(nexp, newbasis, moldata):
     """Compute loss function and gradient for one molecule with respect to basis exponents.
 
@@ -45,6 +116,8 @@ def gradient_mol(nexp, newbasis, moldata):
         - E (float): Loss function value.
         - dE_da (numpy.ndarray): Gradient of loss function with respect to exponents.
     """
+    #start = time.perf_counter()
+
     mol       = moldata['mol'      ]
     rho       = moldata['rho'      ]
     coords    = moldata['coords'   ]
@@ -56,43 +129,98 @@ def gradient_mol(nexp, newbasis, moldata):
 
     newmol = df.make_auxmol(mol, newbasis)
     ao = dft.numint.eval_ao(newmol, coords).T
-
+    #S = np.einsum('i,pi,qi->pq', weights, ao, ao)
+    #S = (ao*weights)@ao.T
+    S = newmol.intor('int1e_ovlp')
     w = np.einsum('i,i,pi->p', rho, weights, ao)
-    dw_da = np.zeros((nexp, newmol.nao))
-    for p in range(newmol.nao):
-        iat = centers[p]
-        r2 = distances[iat]
-        dw_da[idx[p], p] = np.einsum('i,i,i,i->', ao[p], rho, r2, weights)
-
-    #S = np.einsum('pi,qi,i->pq', ao, ao, weights)
-    S = newmol.intor('int1e')
-    dS_da = np.zeros((nexp, newmol.nao, newmol.nao))
-
-    for p in range(newmol.nao):
-        for q in range(p, newmol.nao):
-            ip = idx[p]
-            iq = idx[q]
-            iatp = centers[p]
-            iatq = centers[q]
-            r2p = distances[iatp]
-            r2q = distances[iatq]
-            ao_ao_w = np.einsum('i,i,i->i', ao[p], ao[q], weights)
-            ip_p_q = np.einsum('i,i->', ao_ao_w, r2p)
-            iq_p_q = np.einsum('i,i->', ao_ao_w, r2q)
-            dS_da[ip, p, q] += ip_p_q
-            dS_da[iq, p, q] += iq_p_q
-            if p != q:
-                dS_da[ip, q, p] += ip_p_q
-                dS_da[iq, q, p] += iq_p_q
-
-    c = np.linalg.solve(S, w, assume_a='pos')
-    part1 = np.einsum('p,ip->i',  c, dw_da)
-    part2 = np.einsum('p,ipq,q->i',  c, dS_da, c)
-    dE_da = 2.0*part1 - part2
+    c = spl.solve(S, w, assume_a='pos')
     E = self - c@w
+    del S, w
+    #print(f'energy done: {time.perf_counter()-start:f} s')
+
+
+    # ## the next block can also be expressed as:
+    # ao_mapper = np.zeros(newmol.nao, nexp)
+    # ao_mapper[ np.arange(newmol.nao), idx] = 1
+    # dE_da = np.einsum('i,i,pi,pi,p,pc->c'
+    #     rho, weights,
+    #     distances[centers], ao,
+    #     2*c,
+    #     ao_mapper
+    # )
+    aoc_temp = ao * c[:, None]
+    aoc_per_e = np.zeros((nexp, rho.shape[0]))
+    for p,aoc in enumerate(aoc_temp):
+        aoc *= distances[centers[p]]
+        aoc_per_e[idx[p]] += aoc
+    dE_da = 2* (aoc_per_e @ (rho*weights))
+    del aoc_temp, aoc_per_e
+    #print(f'dw part done: {time.perf_counter()-start:f} s')
+
+    # ## check with original impl.
+    # dw_da = np.zeros((nexp, newmol.nao))
+    # for p in range(newmol.nao):
+    #     iat = centers[p]
+    #     r2 = distances[iat]
+    #     dw_da[idx[p], p] = np.einsum('i,i,i,i->', ao[p], rho, r2, weights)
+    # assert np.allclose(dE_da, 2*dw_da@c)
+    
+    
+    # ## the next block can also be expressed as:
+    # temp = np.einsum('i,pi,qi,p,q,pi->pq',
+    #     weights, ao, ao, c, c,
+    #     distances[centers],
+    # )
+    # temp += temp.T
+    # temp.sum(axis=0)
+    # dE_da -= temp @ ao_mapper  # dE_da -= c@dS_da@c
+    # ## alternatively
+    # dE_da -= np.einsum('i,pi,qi,p,q,pi,pc->c',
+    #     weights, ao, ao, c, c,
+    #     distances[centers],
+    #     2*ao_mapper,
+    # )
+
+    aoc = c[:, None] * ao
+    wao = aoc.sum(axis=0)
+    wao *= weights
+
+    mapped = np.zeros((nexp, rho.shape[0]))
+    for p,aoc_slice in enumerate(aoc):
+        mapped[idx[p]] += aoc_slice * distances[centers[p]]
+    mapped *= wao
+
+    # ## check with original impl.
+    # dS_da = np.zeros((nexp, newmol.nao, newmol.nao))
+    # for p in range(newmol.nao):
+    #     for q in range(p, newmol.nao):
+    #         ip = idx[p]
+    #         iq = idx[q]
+    #         iatp = centers[p]
+    #         iatq = centers[q]
+    #         r2p = distances[iatp]
+    #         r2q = distances[iatq]
+    #         ao_ao_w = np.einsum('i,i,i->i', ao[p], ao[q], weights)
+    #         ip_p_q = np.einsum('i,i->', ao_ao_w, r2p)
+    #         iq_p_q = np.einsum('i,i->', ao_ao_w, r2q)
+    #         dS_da[ip, p, q] += ip_p_q
+    #         dS_da[iq, p, q] += iq_p_q
+    #         if p != q:
+    #             dS_da[ip, q, p] += ip_p_q
+    #             dS_da[iq, q, p] += iq_p_q
+    # comparison = (dS_da@c)@c
+    # assert np.allclose(2*mapped.sum(axis=1), comparison)
+
+
+    
+    dE_da -= 2*mapped.sum(axis=1)
+    del mapped, wao, aoc
+        
+    #print(f'dS part done: {time.perf_counter()-start:f} s')
     return E, dE_da
 
 
+
 def exp2basis(exponents, elements, basis):
     """Convert exponents array to basis set format.
 
diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py
index 9b02c27..fa06242 100644
--- a/qstack/basis_opt/opt.py
+++ b/qstack/basis_opt/opt.py
@@ -1,6 +1,6 @@
 """Basis set optimization routines and command-line interface."""
 
-import sys
+import sys, time
 from ast import literal_eval
 import argparse
 import numpy as np
@@ -39,6 +39,7 @@ def energy(x):
         Returns:
             float: Loss function value.
         """
+        start = time.perf_counter()
         exponents = np.exp(x)
         newbasis = qbbt.exp2basis(exponents, myelements, basis)
         E = sum(runner(
@@ -46,7 +47,7 @@ def energy(x):
             for m in moldata
         ), start=0.0)
         if printlvl>=2:
-            print("energy complete:", E, flush=True)
+            print(f"energy complete: {E:g} (took {time.perf_counter()-start:f} s)", flush=True)
         elif printlvl>=1:
             print(end="", flush=True)
         return E
@@ -62,6 +63,7 @@ def gradient(x):
             - E (float): Loss function value.
             - dE_dx (numpy.ndarray): Gradient with respect to log(exponents).
         """
+        gradstart = time.perf_counter()
         exponents = np.exp(x)
         newbasis = qbbt.exp2basis(exponents, myelements, basis)
 
@@ -69,17 +71,18 @@ def gradient(x):
         dE_da = np.zeros(nexp)
         #for m in moldata:
         def minirun(m):
+            start = time.perf_counter()
             E_, dE_da_ = qbbt.gradient_mol(nexp, newbasis, m)
             if printlvl>=2:
-                print('1-compound gradient: e =', E_, '(', E_/m['self']*100.0, '%)')
+                print(f"1-compound {(m['mol'].nao, m['rho'].shape)!r} gradient: err={E_:g} (rel: {E_/m['self']:g}) (took {time.perf_counter()-start:f} s)", flush=True)
             return E_, dE_da_
-        runs = runner(delayed(minirun)(m) for m in moldata)
+        runs = runner(joblib.delayed(minirun)(m) for m in moldata)
         for E_, dE_da_ in runs:
             E += E_
             dE_da += dE_da_
 
         if printlvl>=2:
-            print("gradient complete:", E, max(abs(dE_da)))
+            print(f"gradient complete: {E:g} (took {time.perf_counter()-gradstart:f}s)", E, max(abs(dE_da)))
         dE_da = qbbt.cut_myelements(dE_da, myelements, bf_bounds)
 
         if printlvl>=2:

From 80468fb4e670d16c74271cd2e48e783cc8e779da Mon Sep 17 00:00:00 2001
From: Liam Marsh <liam.marsh@epfl.ch>
Date: Mon, 23 Mar 2026 11:27:22 +0100
Subject: [PATCH 4/6] fix callability of basis optimisation, and added nwchem
 basis set output

---
 qstack/basis_opt/__main__.py    | 54 +++++++++++++++++++++++++++++++++
 qstack/basis_opt/basis_tools.py | 51 +++++++++++++++++++++++++++++--
 qstack/basis_opt/opt.py         |  8 +++--
 3 files changed, 108 insertions(+), 5 deletions(-)
 create mode 100644 qstack/basis_opt/__main__.py

diff --git a/qstack/basis_opt/__main__.py b/qstack/basis_opt/__main__.py
new file mode 100644
index 0000000..d46357b
--- /dev/null
+++ b/qstack/basis_opt/__main__.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+
+"""
+The command-line launcher for the basis set optimisation function of qstack.basis_opt.opt
+
+(can be called as `python3 -m qstack.basis_opt`, and has a cleaner import chain than `python3 -m qstack.basis_opt.opt`)
+"""
+
+import sys, argparse
+from . import basis_tools as qbbt
+from .opt import optimize_basis
+
+
+def _get_arg_parser():
+    """Parse CLI arguments."""
+    parser = argparse.ArgumentParser(description='Optimize a density fitting basis set.')
+    parser.add_argument('--elements',  type=str,   dest='elements',  nargs='+',    help='elements for optimization')
+    parser.add_argument('--basis',     type=str,   dest='basis',     nargs='+',    help='initial df bases', required=True)
+    parser.add_argument('--molecules', type=str,   dest='molecules', nargs='+',    help='molecules', required=True)
+    parser.add_argument('--gtol',      type=float, dest='gtol',      default=1e-7, help='tolerance')
+    parser.add_argument('--method',    type=str,   dest='method',    default='CG', help='minimization algoritm')
+    parser.add_argument('--print',     type=int,   dest='print',     default=2,    help='printing level')
+    parser.add_argument('--check', action='store_true', dest='check', default=False, help='check the gradient and exit')
+    parser.add_argument('--output',    type=str,   dest='output',    default=None, help='optional file to write the optimised basis set to (nwchem format, which pyscf can read)')
+    return parser
+
+
+def main():
+    """Run basis set optimization via command-line interface."""
+    args = _get_arg_parser().parse_args()
+
+    result = optimize_basis(args.elements, args.basis, args.molecules, args.gtol, args.method, check=args.check, printlvl=args.print)
+    if args.check is False:
+        if args.print==0:
+            qbbt.printbasis(result, sys.stdout)
+        if args.output is not None:
+           with open(args.output, 'w') as f:
+               qbbt.basis_as_nwchem(f, result, '[placeholder name for a qstack-optimised basis set]')
+    else:
+        gr_an, gr_num, gr_diff = result['an'], result['num'], result['diff']
+        print('analytical gradient')
+        print(gr_an)
+        print('numerical gradient')
+        print(gr_num)
+        print('difference')
+        print(gr_diff)
+        print('relative difference')
+        print(gr_diff/gr_num)
+        print(flush=True)
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py
index c703fd1..df376b9 100644
--- a/qstack/basis_opt/basis_tools.py
+++ b/qstack/basis_opt/basis_tools.py
@@ -4,6 +4,7 @@
 import numpy as np
 import scipy.linalg as spl
 from pyscf import df, dft
+import pyscf
 
 
 def energy_mol(newbasis, moldata):
@@ -268,11 +269,57 @@ def printbasis(basis, f):
         f (file): File object to write to.
     """
     print('{', file=f)
-    for q, b in basis.items():
+    for i, (q, b) in enumerate(basis.items()):
         print('  "'+q+'": [', file=f)
         for i, gto in enumerate(b):
             if i > 0:
                 print(',', file=f)
             print('   ', gto, file=f, end='')
-        print('  ]', file=f)
+        if i==len(basis)-1:
+            print('\n  ]', file=f)
+        else:
+            print('\n  ],', file=f)
     print('}', file=f)
+
+
+def basis_as_nwchem(f_out, basis_dict, name='custom basis set'):
+    """Print basis set in NWchem format (the one pyscf reads from files)
+
+    Prints a basis dictionnary into a provided file. The name of the basis, as indicated in the file itself, can be provided.
+
+    Args:
+        f_out (writable io.TextIOBase): the file to write the basis to
+        basis_dict (pyscf-format basis dictionnary): the basis to write
+        name (str): the optional name of the basis
+    """
+
+    sorted_atom_types = sorted(basis_dict.keys(), key=pyscf.data.elements.charge)
+    angular_names = 'spdfgh'
+
+    f_out.write(f"# {name:s}\n# (custom basis written by qstack)\n# supported elements: {', '.join(sorted_atom_types):s}\n\n")
+    f_out.write("BASIS \"ao basis\" PRINT")
+    for atom in sorted_atom_types:
+        fakemol = pyscf.M(atom=atom, charge=pyscf.data.elements.charge(atom))
+        nbas = fakemol.nbas
+        l_count = max(fakemol.bas_angular(x) for x in range(nbas))+1
+
+        prim_count=[0]*l_count
+        shell_count=[0]*l_count
+        for bas_i in range(nbas):
+            l = fakemol.bas_angular(bas_i)
+            prim_count[l] += fakemol.bas_nprim(bas_i)
+            shell_count[l] += fakemol.bas_nctr(bas_i)
+        prim_mark = [f"{n:d}{angular_names[l]:s}" for l,n in enumerate(prim_count) ]
+        shell_mark = [f"{n:d}{angular_names[l]:s}" for l,n in enumerate(shell_count) ]
+
+        f_out.write(f'#BASIS SET: ({",".join(prim_mark):s}) -> [{",".join(shell_mark):s}]\n')
+
+        for bas_i in range(nbas):
+            l = fakemol.bas_angular(bas_i)
+            f_out.write(f"{atom:<2s}    {angular_names[l].upper():s}\n")
+            exps = fakemol.bas_exp(bas_i)
+            coeffs = fakemol.bas_ctr_coeff(bas_i)
+            for exp, exp_coeff in zip(exps,coeffs):
+                fmt = "{:>15.7G}" + "        {:>15.7G}"*len(exp_coeff) + "\n"
+                f_out.write(fmt.format(exp, *exp_coeff).replace('E','D'))
+    f_out.write('END\n')
diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py
index fa06242..c82d4e7 100644
--- a/qstack/basis_opt/opt.py
+++ b/qstack/basis_opt/opt.py
@@ -42,10 +42,12 @@ def energy(x):
         start = time.perf_counter()
         exponents = np.exp(x)
         newbasis = qbbt.exp2basis(exponents, myelements, basis)
-        E = sum(runner(
+        E = 0.0
+        for value in runner(
             joblib.delayed(qbbt.energy_mol)(newbasis, m)
             for m in moldata
-        ), start=0.0)
+        ):
+            E += value
         if printlvl>=2:
             print(f"energy complete: {E:g} (took {time.perf_counter()-start:f} s)", flush=True)
         elif printlvl>=1:
@@ -73,7 +75,7 @@ def gradient(x):
         def minirun(m):
             start = time.perf_counter()
             E_, dE_da_ = qbbt.gradient_mol(nexp, newbasis, m)
-            if printlvl>=2:
+            if printlvl>=3:
                 print(f"1-compound {(m['mol'].nao, m['rho'].shape)!r} gradient: err={E_:g} (rel: {E_/m['self']:g}) (took {time.perf_counter()-start:f} s)", flush=True)
             return E_, dE_da_
         runs = runner(joblib.delayed(minirun)(m) for m in moldata)

From 64bd60e3096c924257ec735f86010509986a5321 Mon Sep 17 00:00:00 2001
From: Liam Marsh <liam.marsh@epfl.ch>
Date: Mon, 23 Mar 2026 11:44:40 +0100
Subject: [PATCH 5/6] fix regular printing and test

---
 qstack/basis_opt/basis_tools.py |  4 ++--
 qstack/basis_opt/opt.py         | 28 ++++++++++++++++++++--------
 tests/test_opt.py               | 33 ++++++++++++++++++++++++++++++++-
 3 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py
index df376b9..483404c 100644
--- a/qstack/basis_opt/basis_tools.py
+++ b/qstack/basis_opt/basis_tools.py
@@ -269,13 +269,13 @@ def printbasis(basis, f):
         f (file): File object to write to.
     """
     print('{', file=f)
-    for i, (q, b) in enumerate(basis.items()):
+    for atom_i, (q, b) in enumerate(basis.items()):
         print('  "'+q+'": [', file=f)
         for i, gto in enumerate(b):
             if i > 0:
                 print(',', file=f)
             print('   ', gto, file=f, end='')
-        if i==len(basis)-1:
+        if atom_i==len(basis)-1:
             print('\n  ]', file=f)
         else:
             print('\n  ],', file=f)
diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py
index c82d4e7..d7fdaa3 100644
--- a/qstack/basis_opt/opt.py
+++ b/qstack/basis_opt/opt.py
@@ -9,7 +9,10 @@
 import pyscf.data
 from ..compound import basis_flatten
 from . import basis_tools as qbbt
-import joblib
+try:
+    import joblib
+except ImportError:
+    joblib = None
 
 def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in="CG", printlvl=2, check=False):
     """Optimize a given basis set.
@@ -28,7 +31,8 @@ def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in=
 
     """
 
-    runner = joblib.Parallel(n_jobs=-1, return_as="generator_unordered")
+    if joblib is not None:
+        runner = joblib.Parallel(n_jobs=-1, return_as="generator_unordered")
 
     def energy(x):
         """Compute total loss function (fitting error) for given exponents.
@@ -43,11 +47,15 @@ def energy(x):
         exponents = np.exp(x)
         newbasis = qbbt.exp2basis(exponents, myelements, basis)
         E = 0.0
-        for value in runner(
-            joblib.delayed(qbbt.energy_mol)(newbasis, m)
-            for m in moldata
-        ):
-            E += value
+        if joblib is not None:
+            for value in runner(
+                joblib.delayed(qbbt.energy_mol)(newbasis, m)
+                for m in moldata
+            ):
+                E += value
+        else:
+            for m in moldata:
+                E += qbbt.energy_mol(newbasis,m)
         if printlvl>=2:
             print(f"energy complete: {E:g} (took {time.perf_counter()-start:f} s)", flush=True)
         elif printlvl>=1:
@@ -78,7 +86,11 @@ def minirun(m):
             if printlvl>=3:
                 print(f"1-compound {(m['mol'].nao, m['rho'].shape)!r} gradient: err={E_:g} (rel: {E_/m['self']:g}) (took {time.perf_counter()-start:f} s)", flush=True)
             return E_, dE_da_
-        runs = runner(joblib.delayed(minirun)(m) for m in moldata)
+
+        if joblib is not None:
+            runs = runner(joblib.delayed(minirun)(m) for m in moldata)
+        else:
+            runs = (minirun(m) for m in moldata)
         for E_, dE_da_ in runs:
             E += E_
             dE_da += dE_da_
diff --git a/tests/test_opt.py b/tests/test_opt.py
index f5f216b..10fb46c 100755
--- a/tests/test_opt.py
+++ b/tests/test_opt.py
@@ -16,7 +16,38 @@ def test_hf_otpd():
     g  = basis_opt.opt.optimize_basis(['H'], [path+'/data/initial/H_N0.txt', path+'/data/initial/O_N0.txt'], [mol_dict], check=True, printlvl=0)
     assert (np.all(g['diff'] < 1e-6))
 
-    ob_good = {'H': [[0, [42.30256758622713, 1]], [0, [6.83662718701579, 1]], [0, [1.8547192742478775, 1]], [0, [0.3797283290452742, 1]], [1, [12.961663119622536, 1]], [1, [2.507400755551906, 1]], [1, [0.6648804678758861, 1]], [2, [3.482167705165484, 1]], [2, [0.6053728887614225, 1]], [3, [0.6284190712545101, 1]]]}
+    # # with the grid-approximated self-overlap
+    # ob_good = {
+    #   'H': [
+    #     [0, [42.30256758622713, 1]],
+    #     [0, [6.83662718701579, 1]],
+    #     [0, [1.8547192742478775, 1]],
+    #     [0, [0.3797283290452742, 1]],
+    #     [1, [12.961663119622536, 1]],
+    #     [1, [2.507400755551906, 1]],
+    #     [1, [0.6648804678758861, 1]],
+    #     [2, [3.482167705165484, 1]],
+    #     [2, [0.6053728887614225, 1]],
+    #     [3, [0.6284190712545101, 1]]
+    #   ],
+    # }
+
+    # # with the true self-overlap
+    ob_good = {
+      "H": [
+        [0, [42.30282217079675, 1]],
+        [0, [6.836275058516526, 1]],
+        [0, [1.8548659895984756, 1]],
+        [0, [0.37973118342659834, 1]],
+        [1, [12.961642133527626, 1]],
+        [1, [2.5073719856017003, 1]],
+        [1, [0.664871596194868, 1]],
+        [2, [3.4821895465370454, 1]],
+        [2, [0.6053775933231458, 1]],
+        [3, [0.6284190862766469, 1]]
+      ],
+    }
+
     ob = basis_opt.opt.optimize_basis(['H'], [path+'/data/initial/H_N0.txt'], [path+'/data/H2.ccpvtz.grid3.npz'], printlvl=2, gtol_in=1e-5)
     for [_l,[a,_c]], [_l1,[a1,_c1]] in zip(ob_good['H'], ob['H'], strict=True):
         assert (abs(a-a1)<1e-5)

From 8084acd3f725e4cbd01f77ddae6a445bb1831ddd Mon Sep 17 00:00:00 2001
From: Liam Marsh <liam.marsh@epfl.ch>
Date: Mon, 23 Mar 2026 11:59:36 +0100
Subject: [PATCH 6/6] chore: make ruff happy

---
 docs/generate_rst.py            |  5 ++---
 qstack/basis_opt/__main__.py    |  6 +++---
 qstack/basis_opt/basis_tools.py | 14 ++++++--------
 qstack/basis_opt/opt.py         |  4 ++--
 tests/test_opt.py               |  2 +-
 5 files changed, 14 insertions(+), 17 deletions(-)
 mode change 100644 => 100755 qstack/basis_opt/__main__.py

diff --git a/docs/generate_rst.py b/docs/generate_rst.py
index ce15334..ed9b8ba 100755
--- a/docs/generate_rst.py
+++ b/docs/generate_rst.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python3
-"""
-Generate Sphinx-ready .rst files
+"""Generate Sphinx-ready .rst files
 
 Used instead of Sphinx to avoid importing broken or expensive dependecies
 
@@ -147,7 +146,7 @@ def extract_module_info(py_path: Path, module_name: str) -> ModuleInfo:
     try:
         tree = ast.parse(src)
     except SyntaxError:
-        return ModuleInfo(module_name, py_path, None, [], [], None, [],)
+        return ModuleInfo(module_name, py_path, None, [], [], None, [])
 
     mdoc = ast.get_docstring(tree)
     classes: list[ClassInfo] = []
diff --git a/qstack/basis_opt/__main__.py b/qstack/basis_opt/__main__.py
old mode 100644
new mode 100755
index d46357b..5a2df8e
--- a/qstack/basis_opt/__main__.py
+++ b/qstack/basis_opt/__main__.py
@@ -1,12 +1,12 @@
 #!/usr/bin/env python3
 
-"""
-The command-line launcher for the basis set optimisation function of qstack.basis_opt.opt
+"""The command-line launcher for the basis set optimisation function of `qstack.basis_opt.opt`.
 
 (can be called as `python3 -m qstack.basis_opt`, and has a cleaner import chain than `python3 -m qstack.basis_opt.opt`)
 """
 
-import sys, argparse
+import sys
+import argparse
 from . import basis_tools as qbbt
 from .opt import optimize_basis
 
diff --git a/qstack/basis_opt/basis_tools.py b/qstack/basis_opt/basis_tools.py
index 483404c..96cd909 100644
--- a/qstack/basis_opt/basis_tools.py
+++ b/qstack/basis_opt/basis_tools.py
@@ -103,7 +103,6 @@ def energy_mol(newbasis, moldata):
 #     print(f'rest done: {time.perf_counter()-start:f} s')
 #     return E, dE_da
 
-import time
 def gradient_mol(nexp, newbasis, moldata):
     """Compute loss function and gradient for one molecule with respect to basis exponents.
 
@@ -165,8 +164,8 @@ def gradient_mol(nexp, newbasis, moldata):
     #     r2 = distances[iat]
     #     dw_da[idx[p], p] = np.einsum('i,i,i,i->', ao[p], rho, r2, weights)
     # assert np.allclose(dE_da, 2*dw_da@c)
-    
-    
+
+
     # ## the next block can also be expressed as:
     # temp = np.einsum('i,pi,qi,p,q,pi->pq',
     #     weights, ao, ao, c, c,
@@ -213,10 +212,10 @@ def gradient_mol(nexp, newbasis, moldata):
     # assert np.allclose(2*mapped.sum(axis=1), comparison)
 
 
-    
+
     dE_da -= 2*mapped.sum(axis=1)
     del mapped, wao, aoc
-        
+
     #print(f'dS part done: {time.perf_counter()-start:f} s')
     return E, dE_da
 
@@ -283,7 +282,7 @@ def printbasis(basis, f):
 
 
 def basis_as_nwchem(f_out, basis_dict, name='custom basis set'):
-    """Print basis set in NWchem format (the one pyscf reads from files)
+    """Print basis set in NWchem format (the one pyscf reads from files).
 
     Prints a basis dictionnary into a provided file. The name of the basis, as indicated in the file itself, can be provided.
 
@@ -292,7 +291,6 @@ def basis_as_nwchem(f_out, basis_dict, name='custom basis set'):
         basis_dict (pyscf-format basis dictionnary): the basis to write
         name (str): the optional name of the basis
     """
-
     sorted_atom_types = sorted(basis_dict.keys(), key=pyscf.data.elements.charge)
     angular_names = 'spdfgh'
 
@@ -319,7 +317,7 @@ def basis_as_nwchem(f_out, basis_dict, name='custom basis set'):
             f_out.write(f"{atom:<2s}    {angular_names[l].upper():s}\n")
             exps = fakemol.bas_exp(bas_i)
             coeffs = fakemol.bas_ctr_coeff(bas_i)
-            for exp, exp_coeff in zip(exps,coeffs):
+            for exp, exp_coeff in zip(exps, coeffs, strict=True):
                 fmt = "{:>15.7G}" + "        {:>15.7G}"*len(exp_coeff) + "\n"
                 f_out.write(fmt.format(exp, *exp_coeff).replace('E','D'))
     f_out.write('END\n')
diff --git a/qstack/basis_opt/opt.py b/qstack/basis_opt/opt.py
index d7fdaa3..aa065d0 100644
--- a/qstack/basis_opt/opt.py
+++ b/qstack/basis_opt/opt.py
@@ -1,6 +1,7 @@
 """Basis set optimization routines and command-line interface."""
 
-import sys, time
+import sys
+import time
 from ast import literal_eval
 import argparse
 import numpy as np
@@ -30,7 +31,6 @@ def optimize_basis(elements_in, basis_in, molecules_in, gtol_in=1e-7, method_in=
         Dictionary containing the optimized basis.
 
     """
-
     if joblib is not None:
         runner = joblib.Parallel(n_jobs=-1, return_as="generator_unordered")
 
diff --git a/tests/test_opt.py b/tests/test_opt.py
index 10fb46c..2fb4a1b 100755
--- a/tests/test_opt.py
+++ b/tests/test_opt.py
@@ -44,7 +44,7 @@ def test_hf_otpd():
         [1, [0.664871596194868, 1]],
         [2, [3.4821895465370454, 1]],
         [2, [0.6053775933231458, 1]],
-        [3, [0.6284190862766469, 1]]
+        [3, [0.6284190862766469, 1]],
       ],
     }