Python rdkit.Chem.RemoveHs() Examples
The following are 25
code examples of rdkit.Chem.RemoveHs().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rdkit.Chem
, or try the search function
.
Example #1
Source File: molecule.py From chemml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _extra_docs(self): # method: to_smiles self._to_smiles_core_names = ("rdkit.Chem.MolToSmarts",) self._to_smiles_core_docs = ("http://rdkit.org/docs/source/rdkit.Chem.inchi.html?highlight=inchi#rdkit.Chem.inchi.MolToInchi",) # method: to_smarts self._to_smarts_core_names = ("rdkit.Chem.MolToSmarts",) self._to_smarts_core_docs = ("http://rdkit.org/docs/source/rdkit.Chem.inchi.html?highlight=inchi#rdkit.Chem.inchi.MolToInchi",) # method: to_inchi self._to_inchi_core_names = ("rdkit.Chem.MolToInchi",) self._to_inchi_core_docs = ("http://rdkit.org/docs/source/rdkit.Chem.inchi.html?highlight=inchi#rdkit.Chem.inchi.MolToInchi",) # method: hydrogens self._hydrogens_core_names = ("rdkit.Chem.AddHs","rdkit.Chem.RemoveHs") self._hydrogens_core_docs = ("http://rdkit.org/docs/source/rdkit.Chem.rdmolops.html?highlight=addhs#rdkit.Chem.rdmolops.AddHs", "http://rdkit.org/docs/source/rdkit.Chem.rdmolops.html?highlight=addhs#rdkit.Chem.rdmolops.RemoveHs") # self._to_xyz_core_names = ("rdkit.Chem.AllChem.MMFFOptimizeMolecule","rdkit.Chem.AllChem.UFFOptimizeMolecule") self._to_xyz_core_docs =( "http://rdkit.org/docs/source/rdkit.Chem.rdForceFieldHelpers.html?highlight=mmff#rdkit.Chem.rdForceFieldHelpers.MMFFOptimizeMolecule", "http://rdkit.org/docs/source/rdkit.Chem.rdForceFieldHelpers.html?highlight=mmff#rdkit.Chem.rdForceFieldHelpers.UFFOptimizeMolecule" )
Example #2
Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 6 votes |
def standardize(self, mol): """Return a standardized version the given molecule. The standardization process consists of the following stages: RDKit :rdkit:`RemoveHs <Chem.rdmolops-module.html#RemoveHs>`, RDKit :rdkit:`SanitizeMol <Chem.rdmolops-module.html#SanitizeMol>`, :class:`~molvs.metal.MetalDisconnector`, :class:`~molvs.normalize.Normalizer`, :class:`~molvs.charge.Reionizer`, RDKit :rdkit:`AssignStereochemistry <Chem.rdmolops-module.html#AssignStereochemistry>`. :param mol: The molecule to standardize. :type mol: :rdkit:`Mol <Chem.rdchem.Mol-class.html>` :returns: The standardized molecule. :rtype: :rdkit:`Mol <Chem.rdchem.Mol-class.html>` """ mol = copy.deepcopy(mol) Chem.RemoveHs(mol) Chem.SanitizeMol(mol) mol = self.disconnect_metals(mol) mol = self.normalize(mol) mol = self.reionize(mol) Chem.AssignStereochemistry(mol, force=True, cleanIt=True) # TODO: Check this removes symmetric stereocenters return mol
Example #3
Source File: standardize.py From MolVS with MIT License | 6 votes |
def standardize(self, mol): """Return a standardized version the given molecule. The standardization process consists of the following stages: RDKit :py:func:`~rdkit.Chem.rdmolops.RemoveHs`, RDKit :py:func:`~rdkit.Chem.rdmolops.SanitizeMol`, :class:`~molvs.metal.MetalDisconnector`, :class:`~molvs.normalize.Normalizer`, :class:`~molvs.charge.Reionizer`, RDKit :py:func:`~rdkit.Chem.rdmolops.AssignStereochemistry`. :param mol: The molecule to standardize. :type mol: rdkit.Chem.rdchem.Mol :returns: The standardized molecule. :rtype: rdkit.Chem.rdchem.Mol """ mol = copy.deepcopy(mol) Chem.SanitizeMol(mol) mol = Chem.RemoveHs(mol) mol = self.disconnect_metals(mol) mol = self.normalize(mol) mol = self.reionize(mol) Chem.AssignStereochemistry(mol, force=True, cleanIt=True) # TODO: Check this removes symmetric stereocenters return mol
Example #4
Source File: test.py From xyz2mol with MIT License | 5 votes |
def test_smiles_from_coord_huckel(smiles): # The answer mol = Chem.MolFromSmiles(smiles) charge = Chem.GetFormalCharge(mol) canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=False) # generate forcefield coordinates atoms, coordinates = generate_structure_from_smiles(smiles) # Generate molobj from atoms, charge and coordinates mol = x2m.xyz2mol(atoms, coordinates, charge=charge, use_huckel=True) # For this test, remove chira. clean and canonical Chem.Kekulize(mol) mol = Chem.RemoveHs(mol) Chem.RemoveStereochemistry(mol) smiles = Chem.MolToSmiles(mol, isomericSmiles=False) # Please look away. A small hack that removes the explicit hydrogens mol = Chem.MolFromSmiles(smiles) smiles = Chem.MolToSmiles(mol) assert smiles == canonical_smiles return
Example #5
Source File: data_utils.py From MAT with MIT License | 5 votes |
def load_data_from_smiles(x_smiles, labels, add_dummy_node=True, one_hot_formal_charge=False): """Load and featurize data from lists of SMILES strings and labels. Args: x_smiles (list[str]): A list of SMILES strings. labels (list[float]): A list of the corresponding labels. add_dummy_node (bool): If True, a dummy node will be added to the molecular graph. Defaults to True. one_hot_formal_charge (bool): If True, formal charges on atoms are one-hot encoded. Defaults to False. Returns: A tuple (X, y) in which X is a list of graph descriptors (node features, adjacency matrices, distance matrices), and y is a list of the corresponding labels. """ x_all, y_all = [], [] for smiles, label in zip(x_smiles, labels): try: mol = MolFromSmiles(smiles) try: mol = Chem.AddHs(mol) AllChem.EmbedMolecule(mol, maxAttempts=5000) AllChem.UFFOptimizeMolecule(mol) mol = Chem.RemoveHs(mol) except: AllChem.Compute2DCoords(mol) afm, adj, dist = featurize_mol(mol, add_dummy_node, one_hot_formal_charge) x_all.append([afm, adj, dist]) y_all.append([label]) except ValueError as e: logging.warning('the SMILES ({}) can not be converted to a graph.\nREASON: {}'.format(smiles, e)) return x_all, y_all
Example #6
Source File: molecule.py From chemml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def hydrogens(self, action='add', **kwargs): """ This function adds/removes hydrogens to/from a prebuilt molecule object. Parameters ---------- action: str Either 'add' or 'remove', to add hydrogns or remove them from the rdkit molecule. kwargs: The arguments that can be passed to the rdkit functions: - `Chem.AddHs`: documentation at http://rdkit.org/docs/source/rdkit.Chem.rdmolops.html?highlight=addhs#rdkit.Chem.rdmolops.AddHs - `Chem.RemoveHs`: documentation at http://rdkit.org/docs/source/rdkit.Chem.rdmolops.html?highlight=addhs#rdkit.Chem.rdmolops.RemoveHs Notes ----- - The rdkit or pybel molecule object must be created in advance. - Only rdkit or pybel molecule object will be modified in place. - If you remove hydrogens from molecules, the atomic 3D coordinates might not be accurate for the conversion to xyz representation. """ # molecule must exist _ = self._check_original_molecule() if action == 'add': if self.pybel_molecule: self.pybel_molecule.addh() # Note: just if not elif if self.rdkit_molecule: self.rdkit_molecule = Chem.AddHs(self.rdkit_molecule, **kwargs) elif action == 'remove': if self.pybel_molecule: self.pybel_molecule.removeh() if self.rdkit_molecule: self.rdkit_molecule = Chem.RemoveHs(self.rdkit_molecule, **kwargs) else: raise ValueError("The parameter 'action' must be either of 'add' or 'remove'.")
Example #7
Source File: rdk.py From oddt with BSD 3-Clause "New" or "Revised" License | 5 votes |
def removeh(self, **kwargs): """Remove hydrogens.""" self.Mol = Chem.RemoveHs(self.Mol, **kwargs) self._clear_cache()
Example #8
Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def rmhs(self, mol): from rdkit.Chem import RemoveHs return RemoveHs(mol)
Example #9
Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def rmhs(self, mol): from rdkit.Chem import RemoveHs return RemoveHs(mol)
Example #10
Source File: estate.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _CalculateEState(mol, skipH=1): """ ################################################################# **Internal used only** Get the EState value of each atom in a molecule ################################################################# """ mol = Chem.AddHs(mol) if skipH == 1: mol = Chem.RemoveHs(mol) tb1 = Chem.GetPeriodicTable() nAtoms = mol.GetNumAtoms() Is = numpy.zeros(nAtoms, numpy.float) for i in range(nAtoms): at = mol.GetAtomWithIdx(i) atNum = at.GetAtomicNum() d = at.GetDegree() if d > 0: h = at.GetTotalNumHs() dv = tb1.GetNOuterElecs(atNum) - h # dv=numpy.array(_AtomHKDeltas(at),'d') N = _GetPrincipleQuantumNumber(atNum) Is[i] = (4.0 / (N * N) * dv + 1) / d dists = Chem.GetDistanceMatrix(mol, useBO=0, useAtomWts=0) dists += 1 accum = numpy.zeros(nAtoms, numpy.float) for i in range(nAtoms): for j in range(i + 1, nAtoms): p = dists[i, j] if p < 1e6: temp = (Is[i] - Is[j]) / (p * p) accum[i] += temp accum[j] -= temp res = accum + Is return res
Example #11
Source File: cats2d.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def ContructLFromGraphSearch(mol): """ ################################################################# The last lipophilic pattern on page 55 of the book is realized as a graph search and not as a SMARTS search. "L" carbon atom adjacent only to carbon atoms. The result is a list format. ################################################################# """ AtomIndex = [] Hmol = Chem.RemoveHs(mol) for atom in Hmol.GetAtoms(): temp = [] if atom.GetAtomicNum() == 6: for neighatom in atom.GetNeighbors(): if neighatom.GetAtomicNum() == 6: temp.append(0) elif neighatom.GetAtomicNum() == 1: continue else: temp.append(1) if sum(temp) == 0: AtomIndex.append(atom.GetIdx()) return AtomIndex ###################################
Example #12
Source File: test.py From xyz2mol with MIT License | 5 votes |
def test_smiles_from_xyz_files(filename, charge, answer): charged_fragments = True quick = True atoms, charge_read, coordinates = x2m.read_xyz_file(filename) mol = x2m.xyz2mol(atoms, coordinates, charge=charge) mol = Chem.RemoveHs(mol) smiles = Chem.MolToSmiles(mol) assert smiles == answer return
Example #13
Source File: test.py From xyz2mol with MIT License | 5 votes |
def test_smiles_from_coord_vdw(smiles): # The answer mol = Chem.MolFromSmiles(smiles) charge = Chem.GetFormalCharge(mol) canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=False) # generate forcefield coordinates atoms, coordinates = generate_structure_from_smiles(smiles) # Generate molobj from atoms, charge and coordinates mol = x2m.xyz2mol(atoms, coordinates, charge=charge) # For this test, remove chira. clean and canonical Chem.Kekulize(mol) mol = Chem.RemoveHs(mol) Chem.RemoveStereochemistry(mol) smiles = Chem.MolToSmiles(mol, isomericSmiles=False) # Please look away. A small hack that removes the explicit hydrogens mol = Chem.MolFromSmiles(smiles) smiles = Chem.MolToSmiles(mol) assert smiles == canonical_smiles return
Example #14
Source File: test.py From xyz2mol with MIT License | 5 votes |
def test_smiles_from_adjacent_matrix(smiles): charged_fragments = True quick = True # Cut apart the smiles mol = get_mol(smiles) atoms = get_atoms(mol) charge = Chem.GetFormalCharge(mol) adjacent_matrix = Chem.GetAdjacencyMatrix(mol) # mol = Chem.RemoveHs(mol) canonical_smiles = Chem.MolToSmiles(mol) # Define new molecule template from atoms new_mol = x2m.get_proto_mol(atoms) # reconstruct the molecule from adjacent matrix, atoms and total charge new_mol = x2m.AC2mol(new_mol, adjacent_matrix, atoms, charge, charged_fragments, quick) new_mol = Chem.RemoveHs(new_mol) new_mol_smiles = Chem.MolToSmiles(new_mol) assert new_mol_smiles == canonical_smiles return
Example #15
Source File: PredX_MPNN.py From dl4chem-geometry with BSD 3-Clause "New" or "Revised" License | 5 votes |
def getRMS(self, prb_mol, ref_pos, useFF=False): def optimizeWithFF(mol): molf = Chem.AddHs(mol, addCoords=True) AllChem.MMFFOptimizeMolecule(molf) molf = Chem.RemoveHs(molf) return molf n_est = prb_mol.GetNumAtoms() ref_cf = Chem.rdchem.Conformer(n_est) for k in range(n_est): ref_cf.SetAtomPosition(k, ref_pos[k].tolist()) ref_mol = copy.deepcopy(prb_mol) ref_mol.RemoveConformer(0) ref_mol.AddConformer(ref_cf) if useFF: try: res = AllChem.AlignMol(prb_mol, optimizeWithFF(ref_mol)) except: res = AllChem.AlignMol(prb_mol, ref_mol) else: res = AllChem.AlignMol(prb_mol, ref_mol) return res
Example #16
Source File: fsapt_helper.py From psikit with BSD 3-Clause "New" or "Revised" License | 5 votes |
def make_feat_data(mol, offset=1): res = [] check_atom = set() nohmol = Chem.RemoveHs(mol) recap_res = Recap.RecapDecompose(nohmol) leaves = [key.replace('*','').replace('()','') for key in recap_res.GetLeaves().keys()] leaves = [leave.replace('[H]', '') for leave in leaves if leave != '[H]'] leaves = sorted(leaves, key=lambda x: Chem.MolFromSmarts(x).GetNumAtoms(), reverse=True) if len(leaves) == 0: line = [i for i in range(mol.GetNumAtoms())] line = [str(n + offset) for n in line] line = [Chem.MolToSmiles(mol)] + line return [line] for leavsmi in leaves: leav = Chem.MolFromSmarts(leavsmi) matches = mol.GetSubstructMatches(leav) for i, match in enumerate(matches): line = list(match) if len(check_atom & set(line)) > 0: continue check_atom = check_atom|set(line) for idx in match: nei = get_neighbor_h(idx, mol) line += nei line = [str(j + offset) for j in line] line = [leavsmi + '_' + str(i)] + line res.append(line) return res
Example #17
Source File: evaluate.py From GLN with MIT License | 5 votes |
def canonicalize(smiles): try: tmp = Chem.MolFromSmiles(smiles) except: print('no mol') return smiles if tmp is None: return smiles tmp = Chem.RemoveHs(tmp) [a.ClearProp('molAtomMapNumber') for a in tmp.GetAtoms()] return Chem.MolToSmiles(tmp)
Example #18
Source File: mol_utils.py From GLN with MIT License | 5 votes |
def cano_smiles(smiles): try: tmp = Chem.MolFromSmiles(smiles) if tmp is None: return None, smiles tmp = Chem.RemoveHs(tmp) if tmp is None: return None, smiles [a.ClearProp('molAtomMapNumber') for a in tmp.GetAtoms()] return tmp, Chem.MolToSmiles(tmp) except: return None, smiles
Example #19
Source File: test_coulomb_matrices.py From deepchem with MIT License | 5 votes |
def test_coulomb_matrix_no_hydrogens(self): """ Test hydrogen removal. """ from rdkit import Chem mol = Chem.RemoveHs(self.mol) assert mol.GetNumAtoms() < self.mol.GetNumAtoms() f = cm.CoulombMatrix( max_atoms=mol.GetNumAtoms(), remove_hydrogens=True, upper_tri=True) rval = f([self.mol]) # use the version with hydrogens size = np.triu_indices(mol.GetNumAtoms())[0].size assert rval.shape == (1, mol.GetNumConformers(), size)
Example #20
Source File: coulomb_matrices.py From deepchem with MIT License | 5 votes |
def coulomb_matrix(self, mol): """ Generate Coulomb matrices for each conformer of the given molecule. Parameters ---------- mol : RDKit Mol Molecule. """ from rdkit import Chem if self.remove_hydrogens: mol = Chem.RemoveHs(mol) n_atoms = mol.GetNumAtoms() z = [atom.GetAtomicNum() for atom in mol.GetAtoms()] rval = [] for conf in mol.GetConformers(): d = self.get_interatomic_distances(conf) m = np.zeros((n_atoms, n_atoms)) for i in range(mol.GetNumAtoms()): for j in range(mol.GetNumAtoms()): if i == j: m[i, j] = 0.5 * z[i]**2.4 elif i < j: m[i, j] = (z[i] * z[j]) / d[i, j] m[j, i] = m[i, j] else: continue if self.randomize: for random_m in self.randomize_coulomb_matrix(m): random_m = pad_array(random_m, self.max_atoms) rval.append(random_m) else: m = pad_array(m, self.max_atoms) rval.append(m) rval = np.asarray(rval) return rval
Example #21
Source File: dataset.py From 3DGCN with MIT License | 5 votes |
def tensorize(self, batch_x, batch_c): atom_tensor = np.zeros((len(batch_x), self.num_atoms, self.get_num_features())) adjm_tensor = np.zeros((len(batch_x), self.num_atoms, self.num_atoms)) posn_tensor = np.zeros((len(batch_x), self.num_atoms, self.num_atoms, 3)) for mol_idx, mol in enumerate(batch_x): Chem.RemoveHs(mol) mol_atoms = mol.GetNumAtoms() # Atom features atom_tensor[mol_idx, :mol_atoms, :] = self.get_atom_features(mol) # Adjacency matrix adjms = np.array(rdmolops.GetAdjacencyMatrix(mol), dtype="float") # Normalize adjacency matrix by D^(-1/2) * A_hat * D^(-1/2), Kipf et al. 2016 adjms += np.eye(mol_atoms) degree = np.array(adjms.sum(1)) deg_inv_sqrt = np.power(degree, -0.5) deg_inv_sqrt[np.isinf(deg_inv_sqrt)] = 0. deg_inv_sqrt = np.diag(deg_inv_sqrt) adjms = np.matmul(np.matmul(deg_inv_sqrt, adjms), deg_inv_sqrt) adjm_tensor[mol_idx, : mol_atoms, : mol_atoms] = adjms # Relative position matrix for atom_idx in range(mol_atoms): pos_c = batch_c[mol_idx][atom_idx] for neighbor_idx in range(mol_atoms): pos_n = batch_c[mol_idx][neighbor_idx] # Direction should be Neighbor -> Center n_to_c = [pos_c[0] - pos_n[0], pos_c[1] - pos_n[1], pos_c[2] - pos_n[2]] posn_tensor[mol_idx, atom_idx, neighbor_idx, :] = n_to_c return [atom_tensor, adjm_tensor, posn_tensor]
Example #22
Source File: converter.py From 3DGCN with MIT License | 4 votes |
def optimize_conformer(idx, m, prop, algo="MMFF"): print("Calculating {}: {} ...".format(idx, Chem.MolToSmiles(m))) mol = Chem.AddHs(m) if algo == "ETKDG": # Landrum et al. DOI: 10.1021/acs.jcim.5b00654 k = AllChem.EmbedMolecule(mol, AllChem.ETKDG()) if k != 0: return None, None elif algo == "UFF": # Universal Force Field AllChem.EmbedMultipleConfs(mol, 50, pruneRmsThresh=0.5) try: arr = AllChem.UFFOptimizeMoleculeConfs(mol, maxIters=2000) except ValueError: return None, None if not arr: return None, None else: arr = AllChem.UFFOptimizeMoleculeConfs(mol, maxIters=20000) idx = np.argmin(arr, axis=0)[1] conf = mol.GetConformers()[idx] mol.RemoveAllConformers() mol.AddConformer(conf) elif algo == "MMFF": # Merck Molecular Force Field AllChem.EmbedMultipleConfs(mol, 50, pruneRmsThresh=0.5) try: arr = AllChem.MMFFOptimizeMoleculeConfs(mol, maxIters=2000) except ValueError: return None, None if not arr: return None, None else: arr = AllChem.MMFFOptimizeMoleculeConfs(mol, maxIters=20000) idx = np.argmin(arr, axis=0)[1] conf = mol.GetConformers()[idx] mol.RemoveAllConformers() mol.AddConformer(conf) mol = Chem.RemoveHs(mol) return mol, prop
Example #23
Source File: test_rdkitfixer.py From oddt with BSD 3-Clause "New" or "Revised" License | 4 votes |
def test_add_missing_atoms(): # add missing atom at tryptophan molfile = os.path.join(test_dir, '5dhh_missingatomTRP.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=True) mol = Chem.RemoveHs(mol, sanitize=False) assert mol.GetNumAtoms() == 26 mol = PreparePDBMol(mol, add_missing_atoms=True) assert mol.GetNumAtoms() == 27 atom = mol.GetAtomWithIdx(21) assert atom.GetAtomicNum() == 6 info = atom.GetPDBResidueInfo() assert info.GetResidueName() == 'TRP' assert info.GetResidueNumber() == 175 assert info.GetName().strip() == 'C9' assert atom.IsInRing() assert atom.GetIsAromatic() assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE # add whole ring to tyrosine molfile = os.path.join(test_dir, '3cx9_TYR.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=True) mol = Chem.RemoveHs(mol, sanitize=False) assert mol.GetNumAtoms() == 23 mol = PreparePDBMol(mol, add_missing_atoms=True) assert mol.GetNumAtoms() == 29 atom = mol.GetAtomWithIdx(17) assert atom.GetAtomicNum() == 6 info = atom.GetPDBResidueInfo() assert info.GetResidueName() == 'TYR' assert info.GetResidueNumber() == 138 assert info.GetName().strip() == 'C6' assert atom.IsInRing() assert atom.GetIsAromatic() assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE # missing protein backbone atoms molfile = os.path.join(test_dir, '5ar7_HIS.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=False) mol = Chem.RemoveHs(mol, sanitize=False) assert mol.GetNumAtoms() == 21 assert mol.GetNumBonds() == 19 mol = PreparePDBMol(mol, add_missing_atoms=True) assert mol.GetNumAtoms() == 25 assert mol.GetNumBonds() == 25 # missing nucleotide backbone atoms molfile = os.path.join(test_dir, '1bpx_missingBase.pdb') mol = Chem.MolFromPDBFile(molfile, sanitize=False) mol = Chem.RemoveHs(mol, sanitize=False) assert mol.GetNumAtoms() == 301 assert mol.GetNumBonds() == 333 mol = PreparePDBMol(mol, add_missing_atoms=True) assert mol.GetNumAtoms() == 328 assert mol.GetNumBonds() == 366
Example #24
Source File: converter.py From ARC with MIT License | 4 votes |
def to_rdkit_mol(mol, remove_h=False, sanitize=True): """ Convert a molecular structure to an RDKit RDMol object. Uses `RDKit <http://rdkit.org/>`_ to perform the conversion. Perceives aromaticity. Adopted from rmgpy/molecule/converter.py Args: mol (Molecule): An RMG Molecule object for the conversion. remove_h (bool, optional): Whether to remove hydrogen atoms from the molecule, ``True`` to remove. sanitize (bool, optional): Whether to sanitize the RDKit molecule, ``True`` to sanitize. Returns: RDMol: An RDKit molecule object corresponding to the input RMG Molecule object. """ atom_id_map = dict() # only manipulate a copy of ``mol`` mol_copy = mol.copy(deep=True) if not mol_copy.atom_ids_valid(): mol_copy.assign_atom_ids() for i, atom in enumerate(mol_copy.atoms): atom_id_map[atom.id] = i # keeps the original atom order before sorting # mol_copy.sort_atoms() # Sort the atoms before converting to ensure output is consistent between different runs atoms_copy = mol_copy.vertices rd_mol = Chem.rdchem.EditableMol(Chem.rdchem.Mol()) for rmg_atom in atoms_copy: rd_atom = Chem.rdchem.Atom(rmg_atom.element.symbol) if rmg_atom.element.isotope != -1: rd_atom.SetIsotope(rmg_atom.element.isotope) rd_atom.SetNumRadicalElectrons(rmg_atom.radical_electrons) rd_atom.SetFormalCharge(rmg_atom.charge) if rmg_atom.element.symbol == 'C' and rmg_atom.lone_pairs == 1 and mol_copy.multiplicity == 1: # hard coding for carbenes rd_atom.SetNumRadicalElectrons(2) if not (remove_h and rmg_atom.symbol == 'H'): rd_mol.AddAtom(rd_atom) rd_bonds = Chem.rdchem.BondType orders = {'S': rd_bonds.SINGLE, 'D': rd_bonds.DOUBLE, 'T': rd_bonds.TRIPLE, 'B': rd_bonds.AROMATIC, 'Q': rd_bonds.QUADRUPLE} # Add the bonds for atom1 in atoms_copy: for atom2, bond12 in atom1.edges.items(): if bond12.is_hydrogen_bond(): continue if atoms_copy.index(atom1) < atoms_copy.index(atom2): rd_mol.AddBond(atom_id_map[atom1.id], atom_id_map[atom2.id], orders[bond12.get_order_str()]) # Make editable mol and rectify the molecule rd_mol = rd_mol.GetMol() if sanitize: Chem.SanitizeMol(rd_mol) if remove_h: rd_mol = Chem.RemoveHs(rd_mol, sanitize=sanitize) return rd_mol
Example #25
Source File: chemistry.py From guacamol with MIT License | 4 votes |
def filter_and_canonicalize(smiles: str, holdout_set, holdout_fps, neutralization_rxns, tanimoto_cutoff=0.5, include_stereocenters=False): """ Args: smiles: the molecule to process holdout_set: smiles of the holdout set holdout_fps: ECFP4 fingerprints of the holdout set neutralization_rxns: neutralization rdkit reactions tanimoto_cutoff: Remove molecules with a higher ECFP4 tanimoto similarity than this cutoff from the set include_stereocenters: whether to keep stereocenters during canonicalization Returns: list with canonical smiles as a list with one element, or a an empty list. This is to perform a flatmap: """ try: # Drop out if too long if len(smiles) > 200: return [] mol = Chem.MolFromSmiles(smiles) # Drop out if invalid if mol is None: return [] mol = Chem.RemoveHs(mol) # We only accept molecules consisting of H, B, C, N, O, F, Si, P, S, Cl, aliphatic Se, Br, I. metal_smarts = Chem.MolFromSmarts('[!#1!#5!#6!#7!#8!#9!#14!#15!#16!#17!#34!#35!#53]') has_metal = mol.HasSubstructMatch(metal_smarts) # Exclude molecules containing the forbidden elements. if has_metal: print(f'metal {smiles}') return [] canon_smi = Chem.MolToSmiles(mol, isomericSmiles=include_stereocenters) # Drop out if too long canonicalized: if len(canon_smi) > 100: return [] # Balance charges if unbalanced if canon_smi.count('+') - canon_smi.count('-') != 0: new_mol, changed = neutralise_charges(mol, reactions=neutralization_rxns) if changed: mol = new_mol canon_smi = Chem.MolToSmiles(mol, isomericSmiles=include_stereocenters) # Get most similar to holdout fingerprints, and exclude too similar molecules. max_tanimoto = highest_tanimoto_precalc_fps(mol, holdout_fps) if max_tanimoto < tanimoto_cutoff and canon_smi not in holdout_set: return [canon_smi] else: print("Exclude: {} {}".format(canon_smi, max_tanimoto)) except Exception as e: print(e) return []