"""Functions for building RDKit molecules.
Such functions include:
- Utilities to import molecules from one package to RDKit
- Determine the connectivity of the molecule from XYZ coordinates using RDKit or OpenBabel
- Determining the bond orders of a molecule using RDKit, OpenBabel, or MDAnalysis
"""
import copy
import warnings
import itertools
import os
import numpy as np
from deepdiff import DeepDiff
from qcelemental.physical_constants import constants
from rdkit import Chem
from rdkit.Geometry import Point3D
from rdkit.Chem import GetPeriodicTable
from rdkit.Chem.rdDetermineBonds import DetermineBondOrders
from rdkit import RDLogger
import periodictable
from MDAnalysis.converters.RDKitInferring import MDAnalysisInferrer
from openbabel import openbabel as ob
from .utils import first_traceback, get_molecular_formula
from .graph_mapping import (
find_atom_mapping,
implicit_hydrogen_atom_mapping,
find_molecular_rings,
mol_to_graph,
)
from .reference_values import (
bond_order_dict,
bond_type_dict,
transition_metal_covalent_radii,
METALS_NUM,
)
# Suppress all OpenBabel output including stereochemistry errors
ob.obErrorLog.SetOutputLevel(0)
ob.obErrorLog.StopLogging()
os.environ["BABEL_SILENCE"] = "1"
RDLogger.DisableLog("rdApp.*")
pt = GetPeriodicTable()
#############################################################################
############################## Atom Assessment ##############################
#############################################################################
[docs]
def get_atom_charge(atom, ignore_multiple_charges=True, use_formal_charge=False):
"""Get the effective charge of an atom based on its default valence and total bond orders.
Parameters
----------
atom : rdkit.Chem.rdchem.Atom
RDKit atom to assess.
ignore_multiple_charges : bool, optional, default=True
If False, raises an error when multiple possible charges are encountered;
otherwise, selects the lowest charge. This is relevant for atoms like sulfur
that can have multiple charge states.
use_formal_charge : bool, optional, default=False
Whether to use the atomic formal charge. If True, the output is the number
of hanging bonds (negative for hanging, positive for excess). Either may
indicate that the formal charge of the atom should be updated.
Returns
-------
float
The formal charge of the atom. May be fractional if aromatics are involved.
If `use_formal_charge` is True, returns the number of excess bonds.
Raises
------
ValueError
If the atom could have multiple charge states given the bonding orders and
`ignore_multiple_charges` is False.
Notes
-----
If multiple possible charges are found and `ignore_multiple_charges` is True,
the lowest charge is selected.
"""
Ntotalbonds = np.array(pt.GetValenceList(atom.GetAtomicNum()))
if Ntotalbonds[0] == -1:
Ntotalbonds = np.array([0])
bond_orders = [bond_order_dict[b.GetBondType().name] for b in atom.GetBonds()]
charge = sum(bond_orders) - Ntotalbonds
if use_formal_charge:
charge -= atom.GetFormalCharge()
charge = charge[np.where(np.min(np.abs(charge)) == np.abs(charge))[0]]
if len(charge) > 1 and not ignore_multiple_charges:
if atom.HasProp("__original_index"):
raise ValueError(
f"Atom {atom.GetSymbol()}, Ligand Index: {atom.GetIdx()}, Complex Index: "
f"{atom.GetIntProp('__original_index')}, can have multiple charge states {charge}"
)
else:
raise ValueError(
f"Atom {atom.GetSymbol()}, Ligand Index: {atom.GetIdx()} "
f"can have multiple charge states {charge}"
)
min_charge = min(charge)
if abs(min_charge - round(min_charge)) < np.finfo(float).eps:
return int(round(min_charge))
return min_charge
[docs]
def assess_atoms(mol, add_atom=None, use_formal_charge=False):
"""Assess an RDKit molecule's atoms to determine which are charged or not fully satisfied.
Parameters
----------
mol : rdkit.Chem.Mol
RDKit molecule.
add_atom : str, optional
If provided, the number of hanging bonds will be the number of this atom type present.
use_formal_charge : bool, optional
Whether to use the atomic formal charge in :func:`get_atom_charge`.
Returns
-------
charge : float
Total charge of the molecule.
hanging_bonds : int
Number of hanging bonds to contribute toward the oxidation state of the metal.
charged_atoms : dict
Detailed information on atoms showing a charge or not satisfied with full bonding.
Each entry contains:
symbol : str
Atomic symbol.
charge : int
Formal charge of the atom as defined by :func:`get_atom_charge`.
bond_orders : list of tuple
List of bonds to this atom. Each bond is represented by a tuple:
bond_type : str
rdkit.Chem.BondType.name
bond_order : int
Custom integer value representing the bond type (DATIVE=0).
BeginAtom_Symbol : str
Atomic symbol of the "BeginAtom".
BeginAtomIdx : int
Molecule index of the "BeginAtom".
EndAtom_Symbol : str
Atomic symbol of the "EndAtom".
EndAtomIdx : int
Molecule index of the "EndAtom".
"""
if mol is None:
raise ValueError("Molecule is None")
mol = copy.deepcopy(mol)
charged_atoms = {}
total_charge = 0
hanging_bonds = 0
if add_atom is not None:
atoms_to_remove = [
a.GetIdx()
for a in mol.GetAtoms()
if a.GetSymbol() == add_atom and not a.HasProp("__original_index")
]
for idx in sorted(atoms_to_remove, reverse=True):
mol.RemoveAtom(idx)
for a in mol.GetAtoms():
charge = get_atom_charge(a)
if charge != 0:
total_charge += charge
charged_atoms[a.GetIdx()] = {
"symbol": a.GetSymbol(),
"charge": charge,
"bond_orders": [
(
b.GetBondType().name,
bond_order_dict[b.GetBondType().name],
b.GetBeginAtom().GetSymbol(),
b.GetBeginAtomIdx(),
b.GetEndAtom().GetSymbol(),
b.GetEndAtomIdx(),
)
for b in a.GetBonds()
],
}
tmp_hanging_bonds = -get_atom_charge(a, use_formal_charge=True)
if tmp_hanging_bonds > 0:
hanging_bonds += tmp_hanging_bonds
elif tmp_hanging_bonds < 0: # Excess of bonds
warnings.warn(
f"Atom {a.GetIdx()}, {a.GetSymbol()}, should have a +{-tmp_hanging_bonds} charge."
)
elif tmp_hanging_bonds % 1 > np.finfo(float).eps:
warnings.warn("Hanging bond is not an integer value")
return total_charge, int(hanging_bonds), charged_atoms
[docs]
def copy_atom_coords(mol1, index1, mol2, index2, confId1=0, confId2=0):
"""Copy the coordinates of one atom to another
Parameters
----------
mol1 : rdkit.Chem.Mol
RDKit target molecule
index1 : int
Index of target atom
mol2 : rdkit.Chem.Mol
RDKit reference molecule
index2 : int
Index of reference atom
confId : int, optional, default=0
Conformer index
"""
conf1 = mol1.GetConformer(confId1)
conf2 = mol2.GetConformer(confId2)
pos = conf2.GetAtomPosition(index2) # returns Point3D
conf1.SetAtomPosition(index1, Point3D(pos.x, pos.y, pos.z))
#############################################################################
########################### Conversion Functions ############################
#############################################################################
[docs]
def qcelemental_to_rdkit(qcel_molecule, use_connectivity=True, distance_tolerance=0.1):
"""
Convert a QCElemental molecule to an RDKit molecule.
Parameters:
-----------
qcel_molecule : qcelemental.models.Molecule
The QCElemental molecule object
use_connectivity : bool
Whether to use existing connectivity information if available
distance_tolerance : float, optional, default=0.1
Additional tolerance for bond distance cutoffs (Angstroms) in :func:`determine_connectivity`
Returns:
--------
rdkit.Chem.Mol
RDKit molecule object
"""
mol = Chem.RWMol()
for i, symbol in enumerate(qcel_molecule.symbols):
atom = Chem.Atom(symbol)
atom.SetNoImplicit(True)
mol.AddAtom(atom)
mol.UpdatePropertyCache(strict=False)
# Set 3D coordinates
if qcel_molecule.geometry is not None:
coords = qcel_molecule.geometry.reshape(-1, 3) * constants.bohr2angstroms
conformer = Chem.Conformer(len(qcel_molecule.symbols))
for i, (x, y, z) in enumerate(coords):
conformer.SetAtomPosition(i, Point3D(x, y, z))
mol.AddConformer(conformer)
# Add bonds if connectivity is provided and requested
if (
use_connectivity
and hasattr(qcel_molecule, "connectivity")
and qcel_molecule.connectivity is not None
):
for bond in qcel_molecule.connectivity:
atom1_idx, atom2_idx, bond_order = bond
if bond_order not in bond_type_dict:
bond_order = 1
mol.AddBond(atom1_idx, atom2_idx, bond_type_dict[bond_order])
else:
mol = determine_connectivity(mol, distance_tolerance=distance_tolerance)
mol.UpdatePropertyCache(strict=False)
mol = mol.GetMol()
try:
Chem.SanitizeMol(mol)
except Exception: # Doesn't always work for metal complexes
pass
return mol
[docs]
def xyz_to_rdkit(symbols, positions, distance_tolerance=0.1, ignore_scale=False):
"""
Convert a QCElemental molecule to an RDKit molecule.
Parameters:
-----------
symbols : list[str]
List of element symbols
positions : numpy.ndarray
Matrix of the same length as ``symbols`` with x, y, and z coordinates in Angstroms
distance_tolerance : float, optional, default=0.1
Additional tolerance for bond distance cutoffs (Angstroms) in :func:`determine_connectivity`
ignore_scale : bool, optional, default=False
If True avoid an error when "H" is present and the minimum atomic distance is not between
0.8 Å and 1.5 Å.
Returns:
--------
rdkit.Chem.Mol
RDKit molecule object
"""
if len(symbols) != len(positions):
raise ValueError(
"Number of provided elements does not match the number of provided positions."
)
distances = np.linalg.norm(positions[:, None, :] - positions[None, :, :], axis=-1)
distances[np.where(distances == 0)] = np.nan
if np.sum(np.isnan(distances)) != len(symbols):
raise ValueError("Atoms are overlapping")
if (
"H" in symbols
and not ignore_scale
and (0.8 >= np.nanmin(distances) or np.nanmin(distances) >= 1.5)
):
raise ValueError(
"Minimum distance is not in the range of a bonded hydrogen; a unit conversion may be required. If this is intentional, consider setting `ignore_scale=True`"
)
mol = Chem.RWMol()
for i, symbol in enumerate(symbols):
atom = Chem.Atom(symbol)
atom.SetNoImplicit(True)
mol.AddAtom(atom)
# Set 3D coordinates
conformer = Chem.Conformer(len(symbols))
for i, pos in enumerate(positions):
conformer.SetAtomPosition(i, Point3D(*pos))
mol.AddConformer(conformer)
mol.UpdatePropertyCache(strict=False)
mol = mol.GetMol()
mol = determine_connectivity(mol, distance_tolerance=distance_tolerance)
mol.UpdatePropertyCache(strict=False)
try:
Chem.SanitizeMol(
mol,
sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL
^ Chem.SanitizeFlags.SANITIZE_SETAROMATICITY, # Needed for porphyrins
)
except Exception: # Doesn't always work for metal complexes
pass
return mol
###############################################################################
########################### Connectivity Functions ############################
###############################################################################
[docs]
def determine_connectivity(rdkit_mol, method="hybrid", distance_tolerance=0.1):
"""
Determine connectivity for molecules, particularly transition metal organometallic complexes.
Parameters:
-----------
rdkit_mol : rdkit.Chem.Mol
RDKit molecule without bonds
method : str, optional, default='hybrid'
Method to use: 'rdkit', 'openbabel', or 'hybrid' where openbabel is attempted and rdkit
is the fall back.
distance_tolerance : float, optional, default=0.1
Additional tolerance for bond distance cutoffs (Angstroms)
Returns:
--------
rdkit.Chem.Mol
RDKit molecule with bonds added
"""
if method == "openbabel":
return _determine_connectivity_openbabel(rdkit_mol)
elif method == "rdkit":
return _determine_connectivity_rdkit(rdkit_mol, distance_tolerance)
else:
return _determine_connectivity_hybrid(rdkit_mol, distance_tolerance)
def _determine_connectivity_openbabel(rdkit_mol):
"""Use OpenBabel to determine connectivity."""
ob_mol = ob.OBMol()
ob_conv = ob.OBConversion()
ob_conv.SetInAndOutFormats("mol", "mol")
mol_block = Chem.MolToMolBlock(rdkit_mol, forceV3000=True)
ob_conv.ReadString(ob_mol, mol_block)
# Determine connectivity
ob_mol.ConnectTheDots()
ob_mol.PerceiveBondOrders()
# Convert back to RDKit
mol_block_with_bonds = ob_conv.WriteString(ob_mol)
mol_with_bonds = Chem.MolFromMolBlock(mol_block_with_bonds, sanitize=False)
return mol_with_bonds
def _get_covalent_radius(symbol, fallback_radius=1.5):
"""Get covalent radius for an element symbol using periodictable."""
try:
element = getattr(periodictable, symbol)
# periodictable stores covalent radius in pm, convert to Angstroms
if hasattr(element, "covalent_radius") and element.covalent_radius is not None:
return element.covalent_radius
elif _is_transition_metal(symbol):
return transition_metal_covalent_radii.get(symbol, fallback_radius)
else:
return fallback_radius
except AttributeError:
return fallback_radius
def _is_transition_metal(symbol):
"""Check if an element is a transition metal using periodictable."""
try:
element = getattr(periodictable, symbol)
# Transition metals have d-block elements (groups 3-12)
# Plus lanthanides and actinides (f-block)
return (
(
hasattr(element, "group")
and element.group is not None
and 3 <= element.group <= 12
)
or (57 <= element.number <= 71)
or (89 <= element.number <= 103)
)
except AttributeError:
# Fallback to hardcoded list for elements not in periodictable
return symbol in transition_metal_covalent_radii
def _determine_connectivity_rdkit(
rdkit_mol, max_distance_tolerance=0.1, min_distance_tolerance=0.5
):
"""
Determine molecular connectivity using RDKit with custom logic for metal atoms.
This function analyzes the 3D coordinates of atoms in an RDKit molecule and adds bonds between atoms
based on covalent radii and distance thresholds. It applies special rules for transition metals by
increasing the bonding threshold to better account for metal complexes.
Parameters
----------
rdkit_mol : rdkit.Chem.Mol
The input RDKit molecule, which must have at least one conformer with 3D coordinates.
max_distance_tolerance : float, optional
The maximum additional distance (in angstroms) allowed beyond the sum of covalent radii for bond formation. Default is 0.1.
min_distance_tolerance : float, optional
The minimum distance (in angstroms) below which atoms are considered too close to form a bond. Default is 0.5.
Returns
-------
rdkit.Chem.Mol
A new RDKit molecule object with bonds added according to the connectivity rules.
Raises
------
ValueError
If the input molecule does not have any conformers.
Notes
-----
- Transition metals are handled with a larger bonding threshold (scaled by 1.3).
- Atoms are considered bonded if their distance is between the minimum and maximum thresholds and no bond already exists.
"""
mol = Chem.RWMol(rdkit_mol)
if mol.GetNumConformers() == 0:
raise ValueError("No conformers are available for this RDKit molecule.")
else:
conformer = mol.GetConformer()
# Calculate distances and add bonds
for i in range(mol.GetNumAtoms()):
symbol_i = mol.GetAtomWithIdx(i).GetSymbol()
pos_i = conformer.GetAtomPosition(i)
for j in range(i + 1, mol.GetNumAtoms()):
symbol_j = mol.GetAtomWithIdx(j).GetSymbol()
radius_i = _get_covalent_radius(symbol_i)
radius_j = _get_covalent_radius(symbol_j)
max_bond_threshold = radius_i + radius_j + max_distance_tolerance
min_bond_threshold = radius_i + radius_j - min_distance_tolerance
if _is_transition_metal(symbol_i) or _is_transition_metal(symbol_j):
max_bond_threshold *= 1.3 # Increase threshold for metal complexes
if (
pos_i.Distance(conformer.GetAtomPosition(j)) > min_bond_threshold
and pos_i.Distance(conformer.GetAtomPosition(j)) < max_bond_threshold
and mol.GetBondBetweenAtoms(i, j) is None
):
mol.AddBond(i, j, Chem.BondType.SINGLE)
return mol.GetMol()
def _determine_connectivity_hybrid(rdkit_mol, distance_tolerance=0.1):
"""Use both RDKit and OpenBabel for best results."""
mol_rdkit = _determine_connectivity_rdkit(rdkit_mol, distance_tolerance)
try:
mol_rdkit = _determine_connectivity_rdkit(rdkit_mol, distance_tolerance)
if mol_rdkit is not None and mol_rdkit.GetNumBonds() > 0:
return mol_rdkit
except Exception:
pass
# Fall back to Openbabel
return _determine_connectivity_openbabel(rdkit_mol)
#############################################################################
########################### Bond Order Functions ############################
#############################################################################
[docs]
def determine_bonds_mda(mol, verbose=False):
"""Determine bond orders with MDAnalysis, or None if failed.
Parameters
----------
mol : rdkit.Chem.Mol
RDKit molecule that needs to be updated
verbose : bool, optional, default=False
If True and an error in bond determination occurs, the primary traceback is printed.
Returns
-------
mol : rdkit.Chem.Mol
New RDKit molecule from mdanalysis determination of bond orders. Note that atom
properties may have been lost. Returns None if bond determination failed.
"""
mol = copy.deepcopy(mol)
mol.UpdatePropertyCache(strict=False)
if any(atm.GetNumImplicitHs() > 0 for atm in mol.GetAtoms()):
raise ValueError("Provided molecule has implicit hydrogen atoms.")
totalcharge, _, charged_atoms_before = assess_atoms(mol)
rings = find_molecular_rings(mol_to_graph(mol), min_ring_size=6, max_ring_size=6)
# Find 6-member rings and set aromatic
aromatic_bonds = [
sorted(ring[x - 1 : x + 1]) for ring in rings for x in range(1, len(ring))
]
aromatic_bonds += [sorted([ring[-1], ring[0]]) for ring in rings if len(ring) > 2]
for bond in mol.GetBonds():
atm1, atm2 = bond.GetBeginAtom(), bond.GetEndAtom()
pair = sorted([atm1.GetIdx(), atm2.GetIdx()])
if (
pair in aromatic_bonds
and atm1.GetIdx() in charged_atoms_before
and atm2.GetIdx() in charged_atoms_before
):
bond.SetBondType(bond_type_dict[1.5])
try:
mol = Chem.RWMol(mol)
MolBondInferrer = MDAnalysisInferrer(max_iter=2000)
mol = MolBondInferrer(mol)
_, _, charged_atoms_after = assess_atoms(mol)
if (
len(DeepDiff(charged_atoms_before, charged_atoms_after)) == 0
and totalcharge != 0
):
warnings.warn("MDAnalysis failed to determine molecular bond orders.")
mol = None
except Exception:
if verbose:
warnings.warn(first_traceback())
warnings.warn("MDAnalysis failed to determine molecular bond orders.")
mol = None
return mol
[docs]
def determine_bonds_rdkit(mol, charge=0, verbose=False):
"""Determine bond orders with RDKit, or None if failed.
Parameters
----------
mol : rdkit.Chem.Mol
RDKit molecule that needs to be updated
charge : int
Set the charge of the molecule when determining the bond orders
verbose : bool, optional, default=False
If True and an error in bond determination occurs, the primary traceback is printed.
Returns
-------
mol : rdkit.Chem.Mol
New RDKit molecule from rdkit determination of bond orders. Note that atom
properties may have been lost. Returns None if bond determination failed.
"""
mol = copy.deepcopy(mol)
mol.UpdatePropertyCache(strict=False)
if any(atm.GetNumImplicitHs() > 0 for atm in mol.GetAtoms()):
raise ValueError("Provided molecule has implicit hydrogen atoms.")
mol = Chem.RWMol(mol)
try:
DetermineBondOrders(
mol, charge=charge, maxIterations=1000, allowChargedFragments=False
)
except Exception:
if verbose:
warnings.warn(first_traceback())
warnings.warn("RDKit failed to determine molecular bond orders.")
mol = None
return mol
[docs]
def determine_bonds_openbabel(mol, return_implicit_Hs=False, verbose=False):
"""Determine bond orders with Open Babel, or None if failed.
Note that atom properties may have been lost and the default from openbabel is to
process the molecule with implicit hydrogens only, so all hydrogens and their positions
are deleted and then optionally restores with approximate coordinates.
Parameters
----------
mol : rdkit.Chem.Mol
RDKit molecule that needs to be updated
return_implicit_Hs : bool, optional, default=False
If False, the inherent implicit hydrogens from the openbabel process are added back with
approximated coordinates.
verbose : bool, optional, default=False
If True and an error in bond determination occurs, the primary traceback is printed.
Returns
-------
mol : rdkit.Chem.Mol
New RDKit molecule from openbabel determination of bond orders. Note that atom
properties may have been lost. Returns None if bond determination failed.
"""
mol = copy.deepcopy(mol)
mol.UpdatePropertyCache(strict=False)
if any(atm.GetNumImplicitHs() > 0 for atm in mol.GetAtoms()):
raise ValueError("Provided molecule has implicit hydrogen atoms.")
mol = Chem.RWMol(mol)
mol.UpdatePropertyCache(strict=False)
try:
mol_no_H = Chem.RemoveHs(mol) # PerceiveBondOrders requires implicit Hs
except Chem.rdchem.AtomValenceException:
if verbose:
warnings.warn(first_traceback())
warnings.warn(
"RDKit sanitize failed to remove hydrogens in preparing for OpenBabel."
)
mol_openbabel = None
else:
obConversion = ob.OBConversion()
obConversion.SetInAndOutFormats("mol", "mol")
obMol = ob.OBMol()
obConversion.ReadString(obMol, Chem.MolToMolBlock(mol_no_H, forceV3000=True))
obMol.PerceiveBondOrders()
aromatyper = ob.OBAromaticTyper()
aromatyper.AssignAromaticFlags(obMol)
obMol.PerceiveBondOrders()
mol_block_processed = obConversion.WriteString(obMol)
# sanitize=False needed for 5 member rings in porphyrins
mol_openbabel = Chem.MolFromMolBlock(mol_block_processed, sanitize=False)
if mol_openbabel is None:
warnings.warn("Openbabel failed to determine molecular bond orders.")
else:
if not return_implicit_Hs:
mol_openbabel = Chem.AddHs(
mol_openbabel, addCoords=True, explicitOnly=False
)
mol_openbabel.UpdatePropertyCache(strict=False)
return mol_openbabel
[docs]
def update_atom_bond_props(mol_to_change, mol_reference, sanitize=True):
"""Update atom and bond properties of one molecule to match another.
Molecules must either be identical in their atomic composition and connectivity, or the reference may be
the same as the other but with implicit hydrogens, in which case it's assumed that the indices are simply
shifted.
Atom properties include the formal charge, whether the atom is aromatic, and setting NoImplicit = True.
Bond properties include the bond order.
Parameters
----------
mol_to_change : rdkit.Chem.Mol
RDKit molecule that needs to be updated
mol_reference : rdkit.Chem.Mol
RDKit molecule for reference to update the target molecule
sanitize : bool, optional, default=True
Sanitize the resulting molecule
Raises
------
ValueError
If the chemical formulas of the provided molecules indicate that they cannot be mapped.
"""
formula_to_change = get_molecular_formula(mol_to_change)
formula_to_change_no_H = get_molecular_formula(
mol_to_change, make_hydrogens_implicit=True
)
formula_reference = get_molecular_formula(mol_reference)
if formula_to_change == formula_reference:
atom_mapping = find_atom_mapping(mol_to_change, mol_reference)
elif formula_to_change_no_H == formula_reference:
atom_mapping = implicit_hydrogen_atom_mapping(mol_to_change)
else:
raise ValueError("The provided molecules cannot be mapped.")
# Correct formal charges
for orig_idx, correct_idx in atom_mapping.items():
orig_atom = mol_to_change.GetAtomWithIdx(orig_idx)
correct_atom = mol_reference.GetAtomWithIdx(correct_idx)
orig_atom.SetFormalCharge(correct_atom.GetFormalCharge())
orig_atom.SetNoImplicit(True)
orig_atom.SetIsAromatic(correct_atom.GetIsAromatic())
# Correct bonds
reverse_mapping = {v: k for k, v in atom_mapping.items()}
target_bonds = {}
for bond in mol_reference.GetBonds():
bond_key = tuple(
sorted(
[
reverse_mapping[bond.GetBeginAtomIdx()],
reverse_mapping[bond.GetEndAtomIdx()],
]
)
)
target_bonds[bond_key] = bond.GetBondType()
# Update existing bonds to correct types
for bond in mol_to_change.GetBonds():
bond_key = tuple(sorted([bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()]))
if bond_key in target_bonds:
target_bond_type = target_bonds[bond_key]
if bond.GetBondType() != target_bond_type:
bond.SetBondType(target_bond_type)
if sanitize:
try:
Chem.SanitizeMol(
mol_to_change,
sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL
^ Chem.SanitizeFlags.SANITIZE_SETAROMATICITY,
)
except Chem.rdchem.AtomValenceException:
warnings.warn("RDKit sanitize failed, passing molecule as is.")
return mol_to_change
[docs]
def add_obvious_bonds(mol):
"""Correct bond order for adjacent atoms with hanging bonds.
Sometimes after the determining bond order analysis (particularly for openbabel)
there are adjacent atoms, each with a hanging bond. The success of this package
is improved significantly by increasing those bond orders by one.
Parameters
----------
mol : rdkit.Chem.Mol
RDKit molecule to update
"""
mol.UpdatePropertyCache(strict=False)
_, _, charged_atoms = assess_atoms(mol)
if charged_atoms:
atoms_hanging_bonds = [index for index in charged_atoms.keys()]
pairs = list(itertools.combinations(atoms_hanging_bonds, 2))
for idx1, idx2 in pairs:
bond = mol.GetBondBetweenAtoms(idx1, idx2)
if bond is not None:
bo = bond_order_dict[bond.GetBondType().name]
atom1 = mol.GetAtomWithIdx(idx1)
chg1 = get_atom_charge(atom1)
atom2 = mol.GetAtomWithIdx(idx2)
chg2 = get_atom_charge(atom2)
if bo == 1.5 or chg1 != chg2:
continue
if bo - chg1 not in bond_type_dict:
raise ValueError(
f"Bond order change cannot be rectified. Orig Bond Order: {bo}, Atom Charge {chg1}"
)
bond.SetBondType(bond_type_dict[bo - chg1])
atom1.SetFormalCharge(0)
atom2.SetFormalCharge(0)
mol.UpdatePropertyCache(strict=False)