Python Examples of rdkit.Chem.RemoveHs

Source File: molecule.py From chemml with BSD 3-Clause "New" or "Revised" License

6 votes

def _extra_docs(self):
        # method: to_smiles
        self._to_smiles_core_names = ("rdkit.Chem.MolToSmarts",)
        self._to_smiles_core_docs = ("http://rdkit.org/docs/source/rdkit.Chem.inchi.html?highlight=inchi#rdkit.Chem.inchi.MolToInchi",)
        # method: to_smarts
        self._to_smarts_core_names = ("rdkit.Chem.MolToSmarts",)
        self._to_smarts_core_docs = ("http://rdkit.org/docs/source/rdkit.Chem.inchi.html?highlight=inchi#rdkit.Chem.inchi.MolToInchi",)
        # method: to_inchi
        self._to_inchi_core_names = ("rdkit.Chem.MolToInchi",)
        self._to_inchi_core_docs = ("http://rdkit.org/docs/source/rdkit.Chem.inchi.html?highlight=inchi#rdkit.Chem.inchi.MolToInchi",)
        # method: hydrogens
        self._hydrogens_core_names = ("rdkit.Chem.AddHs","rdkit.Chem.RemoveHs")
        self._hydrogens_core_docs = ("http://rdkit.org/docs/source/rdkit.Chem.rdmolops.html?highlight=addhs#rdkit.Chem.rdmolops.AddHs",
                                    "http://rdkit.org/docs/source/rdkit.Chem.rdmolops.html?highlight=addhs#rdkit.Chem.rdmolops.RemoveHs")
        #
        self._to_xyz_core_names = ("rdkit.Chem.AllChem.MMFFOptimizeMolecule","rdkit.Chem.AllChem.UFFOptimizeMolecule")
        self._to_xyz_core_docs =(
            "http://rdkit.org/docs/source/rdkit.Chem.rdForceFieldHelpers.html?highlight=mmff#rdkit.Chem.rdForceFieldHelpers.MMFFOptimizeMolecule",
            "http://rdkit.org/docs/source/rdkit.Chem.rdForceFieldHelpers.html?highlight=mmff#rdkit.Chem.rdForceFieldHelpers.UFFOptimizeMolecule"
        )

Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

6 votes

def standardize(self, mol):
        """Return a standardized version the given molecule.

        The standardization process consists of the following stages: RDKit
        :rdkit:`RemoveHs <Chem.rdmolops-module.html#RemoveHs>`, RDKit
        :rdkit:`SanitizeMol <Chem.rdmolops-module.html#SanitizeMol>`, :class:`~molvs.metal.MetalDisconnector`,
        :class:`~molvs.normalize.Normalizer`, :class:`~molvs.charge.Reionizer`, RDKit
        :rdkit:`AssignStereochemistry <Chem.rdmolops-module.html#AssignStereochemistry>`.

        :param mol: The molecule to standardize.
        :type mol: :rdkit:`Mol <Chem.rdchem.Mol-class.html>`
        :returns: The standardized molecule.
        :rtype: :rdkit:`Mol <Chem.rdchem.Mol-class.html>`
        """
        mol = copy.deepcopy(mol)
        Chem.RemoveHs(mol)
        Chem.SanitizeMol(mol)
        mol = self.disconnect_metals(mol)
        mol = self.normalize(mol)
        mol = self.reionize(mol)
        Chem.AssignStereochemistry(mol, force=True, cleanIt=True)
        # TODO: Check this removes symmetric stereocenters
        return mol

Source File: standardize.py From MolVS with MIT License

6 votes

def standardize(self, mol):
        """Return a standardized version the given molecule.

        The standardization process consists of the following stages: RDKit
        :py:func:`~rdkit.Chem.rdmolops.RemoveHs`, RDKit :py:func:`~rdkit.Chem.rdmolops.SanitizeMol`,
        :class:`~molvs.metal.MetalDisconnector`, :class:`~molvs.normalize.Normalizer`,
        :class:`~molvs.charge.Reionizer`, RDKit :py:func:`~rdkit.Chem.rdmolops.AssignStereochemistry`.

        :param mol: The molecule to standardize.
        :type mol: rdkit.Chem.rdchem.Mol
        :returns: The standardized molecule.
        :rtype: rdkit.Chem.rdchem.Mol
        """
        mol = copy.deepcopy(mol)
        Chem.SanitizeMol(mol)
        mol = Chem.RemoveHs(mol)
        mol = self.disconnect_metals(mol)
        mol = self.normalize(mol)
        mol = self.reionize(mol)
        Chem.AssignStereochemistry(mol, force=True, cleanIt=True)
        # TODO: Check this removes symmetric stereocenters
        return mol

Source File: test.py From xyz2mol with MIT License

5 votes

def test_smiles_from_coord_huckel(smiles):

    # The answer
    mol = Chem.MolFromSmiles(smiles)
    charge = Chem.GetFormalCharge(mol)
    canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=False)

    # generate forcefield coordinates
    atoms, coordinates = generate_structure_from_smiles(smiles)

    # Generate molobj from atoms, charge and coordinates
    mol = x2m.xyz2mol(atoms, coordinates, charge=charge, use_huckel=True)

    # For this test, remove chira. clean and canonical
    Chem.Kekulize(mol)
    mol = Chem.RemoveHs(mol)
    Chem.RemoveStereochemistry(mol)
    smiles = Chem.MolToSmiles(mol, isomericSmiles=False)

    # Please look away. A small hack that removes the explicit hydrogens
    mol = Chem.MolFromSmiles(smiles)
    smiles = Chem.MolToSmiles(mol)

    assert smiles == canonical_smiles

    return

Source File: data_utils.py From MAT with MIT License

5 votes

def load_data_from_smiles(x_smiles, labels, add_dummy_node=True, one_hot_formal_charge=False):
    """Load and featurize data from lists of SMILES strings and labels.

    Args:
        x_smiles (list[str]): A list of SMILES strings.
        labels (list[float]): A list of the corresponding labels.
        add_dummy_node (bool): If True, a dummy node will be added to the molecular graph. Defaults to True.
        one_hot_formal_charge (bool): If True, formal charges on atoms are one-hot encoded. Defaults to False.

    Returns:
        A tuple (X, y) in which X is a list of graph descriptors (node features, adjacency matrices, distance matrices),
        and y is a list of the corresponding labels.
    """
    x_all, y_all = [], []

    for smiles, label in zip(x_smiles, labels):
        try:
            mol = MolFromSmiles(smiles)
            try:
                mol = Chem.AddHs(mol)
                AllChem.EmbedMolecule(mol, maxAttempts=5000)
                AllChem.UFFOptimizeMolecule(mol)
                mol = Chem.RemoveHs(mol)
            except:
                AllChem.Compute2DCoords(mol)

            afm, adj, dist = featurize_mol(mol, add_dummy_node, one_hot_formal_charge)
            x_all.append([afm, adj, dist])
            y_all.append([label])
        except ValueError as e:
            logging.warning('the SMILES ({}) can not be converted to a graph.\nREASON: {}'.format(smiles, e))

    return x_all, y_all

Source File: molecule.py From chemml with BSD 3-Clause "New" or "Revised" License

5 votes

def hydrogens(self, action='add', **kwargs):
        """
        This function adds/removes hydrogens to/from a prebuilt molecule object.

        Parameters
        ----------
        action: str
            Either 'add' or 'remove', to add hydrogns or remove them from the rdkit molecule.

        kwargs:
            The arguments that can be passed to the rdkit functions:
            - `Chem.AddHs`: documentation at http://rdkit.org/docs/source/rdkit.Chem.rdmolops.html?highlight=addhs#rdkit.Chem.rdmolops.AddHs
            - `Chem.RemoveHs`: documentation at http://rdkit.org/docs/source/rdkit.Chem.rdmolops.html?highlight=addhs#rdkit.Chem.rdmolops.RemoveHs

        Notes
        -----
            - The rdkit or pybel molecule object must be created in advance.
            - Only rdkit or pybel molecule object will be modified in place.
            - If you remove hydrogens from molecules, the atomic 3D coordinates might not be accurate for the conversion to xyz representation.

        """

        # molecule must exist
        _ = self._check_original_molecule()

        if action == 'add':
            if self.pybel_molecule:
                self.pybel_molecule.addh()
            # Note: just if not elif
            if self.rdkit_molecule:
                self.rdkit_molecule = Chem.AddHs(self.rdkit_molecule, **kwargs)
        elif action == 'remove':
            if self.pybel_molecule:
                self.pybel_molecule.removeh()
            if self.rdkit_molecule:
                self.rdkit_molecule = Chem.RemoveHs(self.rdkit_molecule, **kwargs)
        else:
            raise ValueError("The parameter 'action' must be either of 'add' or 'remove'.")

Source File: rdk.py From oddt with BSD 3-Clause "New" or "Revised" License

5 votes

def removeh(self, **kwargs):
        """Remove hydrogens."""
        self.Mol = Chem.RemoveHs(self.Mol, **kwargs)
        self._clear_cache()

Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

5 votes

def rmhs(self, mol):
        from rdkit.Chem import RemoveHs

        return RemoveHs(mol)

Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

5 votes

def rmhs(self, mol):
        from rdkit.Chem import RemoveHs

        return RemoveHs(mol)

Source File: estate.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

5 votes

def _CalculateEState(mol, skipH=1):
    """
    #################################################################
    **Internal used only**

    Get the EState value of each atom in a molecule
    #################################################################
    """
    mol = Chem.AddHs(mol)
    if skipH == 1:
        mol = Chem.RemoveHs(mol)
    tb1 = Chem.GetPeriodicTable()
    nAtoms = mol.GetNumAtoms()
    Is = numpy.zeros(nAtoms, numpy.float)
    for i in range(nAtoms):
        at = mol.GetAtomWithIdx(i)
        atNum = at.GetAtomicNum()
        d = at.GetDegree()
        if d > 0:
            h = at.GetTotalNumHs()
            dv = tb1.GetNOuterElecs(atNum) - h
            # dv=numpy.array(_AtomHKDeltas(at),'d')
            N = _GetPrincipleQuantumNumber(atNum)
            Is[i] = (4.0 / (N * N) * dv + 1) / d
    dists = Chem.GetDistanceMatrix(mol, useBO=0, useAtomWts=0)
    dists += 1
    accum = numpy.zeros(nAtoms, numpy.float)
    for i in range(nAtoms):
        for j in range(i + 1, nAtoms):
            p = dists[i, j]
            if p < 1e6:
                temp = (Is[i] - Is[j]) / (p * p)
                accum[i] += temp
                accum[j] -= temp
    res = accum + Is
    return res

Source File: cats2d.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

5 votes

def ContructLFromGraphSearch(mol):
    """
    #################################################################
    The last lipophilic pattern on page 55 of the book is realized as a graph
    search and not as a SMARTS search.

    "L" carbon atom adjacent only to carbon atoms.

    The result is a list format.
    #################################################################
    """

    AtomIndex = []
    Hmol = Chem.RemoveHs(mol)
    for atom in Hmol.GetAtoms():
        temp = []
        if atom.GetAtomicNum() == 6:
            for neighatom in atom.GetNeighbors():
                if neighatom.GetAtomicNum() == 6:
                    temp.append(0)
                elif neighatom.GetAtomicNum() == 1:
                    continue
                else:
                    temp.append(1)
            if sum(temp) == 0:
                AtomIndex.append(atom.GetIdx())

    return AtomIndex


###################################

Source File: test.py From xyz2mol with MIT License

5 votes

def test_smiles_from_xyz_files(filename, charge, answer):

    charged_fragments = True
    quick = True

    atoms, charge_read, coordinates = x2m.read_xyz_file(filename)

    mol = x2m.xyz2mol(atoms, coordinates, charge=charge)
    mol = Chem.RemoveHs(mol)

    smiles = Chem.MolToSmiles(mol)

    assert smiles == answer

    return

Source File: test.py From xyz2mol with MIT License

5 votes

def test_smiles_from_coord_vdw(smiles):

    # The answer
    mol = Chem.MolFromSmiles(smiles)
    charge = Chem.GetFormalCharge(mol)
    canonical_smiles = Chem.MolToSmiles(mol, isomericSmiles=False)

    # generate forcefield coordinates
    atoms, coordinates = generate_structure_from_smiles(smiles)

    # Generate molobj from atoms, charge and coordinates
    mol = x2m.xyz2mol(atoms, coordinates, charge=charge)

    # For this test, remove chira. clean and canonical
    Chem.Kekulize(mol)
    mol = Chem.RemoveHs(mol)
    Chem.RemoveStereochemistry(mol)
    smiles = Chem.MolToSmiles(mol, isomericSmiles=False)

    # Please look away. A small hack that removes the explicit hydrogens
    mol = Chem.MolFromSmiles(smiles)
    smiles = Chem.MolToSmiles(mol)

    assert smiles == canonical_smiles

    return

Source File: test.py From xyz2mol with MIT License

5 votes

def test_smiles_from_adjacent_matrix(smiles):

    charged_fragments = True
    quick = True

    # Cut apart the smiles
    mol = get_mol(smiles)
    atoms = get_atoms(mol)
    charge = Chem.GetFormalCharge(mol)
    adjacent_matrix = Chem.GetAdjacencyMatrix(mol)

    #
    mol = Chem.RemoveHs(mol)
    canonical_smiles = Chem.MolToSmiles(mol)

    # Define new molecule template from atoms
    new_mol = x2m.get_proto_mol(atoms)

    # reconstruct the molecule from adjacent matrix, atoms and total charge
    new_mol = x2m.AC2mol(new_mol, adjacent_matrix, atoms, charge, charged_fragments, quick)
    new_mol = Chem.RemoveHs(new_mol)
    new_mol_smiles = Chem.MolToSmiles(new_mol)

    assert new_mol_smiles == canonical_smiles

    return

Source File: PredX_MPNN.py From dl4chem-geometry with BSD 3-Clause "New" or "Revised" License

5 votes

def getRMS(self, prb_mol, ref_pos, useFF=False):

        def optimizeWithFF(mol):

            molf = Chem.AddHs(mol, addCoords=True)
            AllChem.MMFFOptimizeMolecule(molf)
            molf = Chem.RemoveHs(molf)

            return molf

        n_est = prb_mol.GetNumAtoms()

        ref_cf = Chem.rdchem.Conformer(n_est)
        for k in range(n_est):
            ref_cf.SetAtomPosition(k, ref_pos[k].tolist())

        ref_mol = copy.deepcopy(prb_mol)
        ref_mol.RemoveConformer(0)
        ref_mol.AddConformer(ref_cf)

        if useFF:
            try:
                res = AllChem.AlignMol(prb_mol, optimizeWithFF(ref_mol))
            except:
                res = AllChem.AlignMol(prb_mol, ref_mol)
        else:
            res = AllChem.AlignMol(prb_mol, ref_mol)

        return res

Source File: fsapt_helper.py From psikit with BSD 3-Clause "New" or "Revised" License

5 votes

def make_feat_data(mol, offset=1):
    res = []
    check_atom = set()
    nohmol = Chem.RemoveHs(mol)
    recap_res = Recap.RecapDecompose(nohmol)
    leaves = [key.replace('*','').replace('()','') for key in recap_res.GetLeaves().keys()]
    leaves = [leave.replace('[H]', '') for leave in leaves if leave != '[H]']
    leaves = sorted(leaves, key=lambda x: Chem.MolFromSmarts(x).GetNumAtoms(), reverse=True)
    if len(leaves) == 0:

        line = [i for i in range(mol.GetNumAtoms())]
        line = [str(n + offset) for n in line]
        line = [Chem.MolToSmiles(mol)] + line
        return [line]
    for leavsmi in leaves:
        leav = Chem.MolFromSmarts(leavsmi)
        matches = mol.GetSubstructMatches(leav)
        for i, match in enumerate(matches):
            line = list(match)
            if len(check_atom & set(line)) > 0:
                continue
            check_atom = check_atom|set(line)
            for idx in match:
                nei = get_neighbor_h(idx, mol)
                line += nei
            line = [str(j + offset) for j in line]
            line = [leavsmi + '_' + str(i)] + line
            res.append(line)
    return res

Source File: evaluate.py From GLN with MIT License

5 votes

def canonicalize(smiles):
    try:        
        tmp = Chem.MolFromSmiles(smiles)
    except:
        print('no mol')
        return smiles
    if tmp is None:
        return smiles
    tmp = Chem.RemoveHs(tmp)
    [a.ClearProp('molAtomMapNumber') for a in tmp.GetAtoms()]
    return Chem.MolToSmiles(tmp)

Source File: mol_utils.py From GLN with MIT License

5 votes

def cano_smiles(smiles):
    try:
        tmp = Chem.MolFromSmiles(smiles)
        if tmp is None:
            return None, smiles        
        tmp = Chem.RemoveHs(tmp)
        if tmp is None:
            return None, smiles
        [a.ClearProp('molAtomMapNumber') for a in tmp.GetAtoms()]
        return tmp, Chem.MolToSmiles(tmp)            
    except:
        return None, smiles

Source File: test_coulomb_matrices.py From deepchem with MIT License

5 votes

def test_coulomb_matrix_no_hydrogens(self):
    """
        Test hydrogen removal.
        """
    from rdkit import Chem
    mol = Chem.RemoveHs(self.mol)
    assert mol.GetNumAtoms() < self.mol.GetNumAtoms()
    f = cm.CoulombMatrix(
        max_atoms=mol.GetNumAtoms(), remove_hydrogens=True, upper_tri=True)
    rval = f([self.mol])  # use the version with hydrogens
    size = np.triu_indices(mol.GetNumAtoms())[0].size
    assert rval.shape == (1, mol.GetNumConformers(), size)

Source File: coulomb_matrices.py From deepchem with MIT License

5 votes

def coulomb_matrix(self, mol):
    """
    Generate Coulomb matrices for each conformer of the given molecule.

    Parameters
    ----------
    mol : RDKit Mol
        Molecule.
    """
    from rdkit import Chem
    if self.remove_hydrogens:
      mol = Chem.RemoveHs(mol)
    n_atoms = mol.GetNumAtoms()
    z = [atom.GetAtomicNum() for atom in mol.GetAtoms()]
    rval = []
    for conf in mol.GetConformers():
      d = self.get_interatomic_distances(conf)
      m = np.zeros((n_atoms, n_atoms))
      for i in range(mol.GetNumAtoms()):
        for j in range(mol.GetNumAtoms()):
          if i == j:
            m[i, j] = 0.5 * z[i]**2.4
          elif i < j:
            m[i, j] = (z[i] * z[j]) / d[i, j]
            m[j, i] = m[i, j]
          else:
            continue
      if self.randomize:
        for random_m in self.randomize_coulomb_matrix(m):
          random_m = pad_array(random_m, self.max_atoms)
          rval.append(random_m)
      else:
        m = pad_array(m, self.max_atoms)
        rval.append(m)
    rval = np.asarray(rval)
    return rval

Source File: dataset.py From 3DGCN with MIT License

5 votes

def tensorize(self, batch_x, batch_c):
        atom_tensor = np.zeros((len(batch_x), self.num_atoms, self.get_num_features()))
        adjm_tensor = np.zeros((len(batch_x), self.num_atoms, self.num_atoms))
        posn_tensor = np.zeros((len(batch_x), self.num_atoms, self.num_atoms, 3))

        for mol_idx, mol in enumerate(batch_x):
            Chem.RemoveHs(mol)
            mol_atoms = mol.GetNumAtoms()

            # Atom features
            atom_tensor[mol_idx, :mol_atoms, :] = self.get_atom_features(mol)

            # Adjacency matrix
            adjms = np.array(rdmolops.GetAdjacencyMatrix(mol), dtype="float")

            # Normalize adjacency matrix by D^(-1/2) * A_hat * D^(-1/2), Kipf et al. 2016
            adjms += np.eye(mol_atoms)
            degree = np.array(adjms.sum(1))
            deg_inv_sqrt = np.power(degree, -0.5)
            deg_inv_sqrt[np.isinf(deg_inv_sqrt)] = 0.
            deg_inv_sqrt = np.diag(deg_inv_sqrt)

            adjms = np.matmul(np.matmul(deg_inv_sqrt, adjms), deg_inv_sqrt)

            adjm_tensor[mol_idx, : mol_atoms, : mol_atoms] = adjms

            # Relative position matrix
            for atom_idx in range(mol_atoms):
                pos_c = batch_c[mol_idx][atom_idx]

                for neighbor_idx in range(mol_atoms):
                    pos_n = batch_c[mol_idx][neighbor_idx]

                    # Direction should be Neighbor -> Center
                    n_to_c = [pos_c[0] - pos_n[0], pos_c[1] - pos_n[1], pos_c[2] - pos_n[2]]
                    posn_tensor[mol_idx, atom_idx, neighbor_idx, :] = n_to_c

        return [atom_tensor, adjm_tensor, posn_tensor]

Source File: converter.py From 3DGCN with MIT License

4 votes

def optimize_conformer(idx, m, prop, algo="MMFF"):
    print("Calculating {}: {} ...".format(idx, Chem.MolToSmiles(m)))

    mol = Chem.AddHs(m)

    if algo == "ETKDG":
        # Landrum et al. DOI: 10.1021/acs.jcim.5b00654
        k = AllChem.EmbedMolecule(mol, AllChem.ETKDG())

        if k != 0:
            return None, None

    elif algo == "UFF":
        # Universal Force Field
        AllChem.EmbedMultipleConfs(mol, 50, pruneRmsThresh=0.5)
        try:
            arr = AllChem.UFFOptimizeMoleculeConfs(mol, maxIters=2000)
        except ValueError:
            return None, None

        if not arr:
            return None, None

        else:
            arr = AllChem.UFFOptimizeMoleculeConfs(mol, maxIters=20000)
            idx = np.argmin(arr, axis=0)[1]
            conf = mol.GetConformers()[idx]
            mol.RemoveAllConformers()
            mol.AddConformer(conf)

    elif algo == "MMFF":
        # Merck Molecular Force Field
        AllChem.EmbedMultipleConfs(mol, 50, pruneRmsThresh=0.5)
        try:
            arr = AllChem.MMFFOptimizeMoleculeConfs(mol, maxIters=2000)
        except ValueError:
            return None, None

        if not arr:
            return None, None

        else:
            arr = AllChem.MMFFOptimizeMoleculeConfs(mol, maxIters=20000)
            idx = np.argmin(arr, axis=0)[1]
            conf = mol.GetConformers()[idx]
            mol.RemoveAllConformers()
            mol.AddConformer(conf)

    mol = Chem.RemoveHs(mol)

    return mol, prop

Source File: test_rdkitfixer.py From oddt with BSD 3-Clause "New" or "Revised" License

4 votes

def test_add_missing_atoms():
    # add missing atom at tryptophan
    molfile = os.path.join(test_dir, '5dhh_missingatomTRP.pdb')
    mol = Chem.MolFromPDBFile(molfile, sanitize=True)
    mol = Chem.RemoveHs(mol, sanitize=False)

    assert mol.GetNumAtoms() == 26
    mol = PreparePDBMol(mol, add_missing_atoms=True)
    assert mol.GetNumAtoms() == 27

    atom = mol.GetAtomWithIdx(21)
    assert atom.GetAtomicNum() == 6
    info = atom.GetPDBResidueInfo()
    assert info.GetResidueName() == 'TRP'
    assert info.GetResidueNumber() == 175
    assert info.GetName().strip() == 'C9'
    assert atom.IsInRing()
    assert atom.GetIsAromatic()
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE

    # add whole ring to tyrosine
    molfile = os.path.join(test_dir, '3cx9_TYR.pdb')
    mol = Chem.MolFromPDBFile(molfile, sanitize=True)
    mol = Chem.RemoveHs(mol, sanitize=False)

    assert mol.GetNumAtoms() == 23
    mol = PreparePDBMol(mol, add_missing_atoms=True)
    assert mol.GetNumAtoms() == 29

    atom = mol.GetAtomWithIdx(17)
    assert atom.GetAtomicNum() == 6
    info = atom.GetPDBResidueInfo()
    assert info.GetResidueName() == 'TYR'
    assert info.GetResidueNumber() == 138
    assert info.GetName().strip() == 'C6'
    assert atom.IsInRing()
    assert atom.GetIsAromatic()
    assert Chem.SanitizeMol(mol) == Chem.SanitizeFlags.SANITIZE_NONE

    # missing protein backbone atoms
    molfile = os.path.join(test_dir, '5ar7_HIS.pdb')
    mol = Chem.MolFromPDBFile(molfile, sanitize=False)
    mol = Chem.RemoveHs(mol, sanitize=False)

    assert mol.GetNumAtoms() == 21
    assert mol.GetNumBonds() == 19
    mol = PreparePDBMol(mol, add_missing_atoms=True)
    assert mol.GetNumAtoms() == 25
    assert mol.GetNumBonds() == 25

    # missing nucleotide backbone atoms
    molfile = os.path.join(test_dir, '1bpx_missingBase.pdb')
    mol = Chem.MolFromPDBFile(molfile, sanitize=False)
    mol = Chem.RemoveHs(mol, sanitize=False)

    assert mol.GetNumAtoms() == 301
    assert mol.GetNumBonds() == 333
    mol = PreparePDBMol(mol, add_missing_atoms=True)
    assert mol.GetNumAtoms() == 328
    assert mol.GetNumBonds() == 366

Source File: converter.py From ARC with MIT License

4 votes

def to_rdkit_mol(mol, remove_h=False, sanitize=True):
    """
    Convert a molecular structure to an RDKit RDMol object. Uses
    `RDKit <http://rdkit.org/>`_ to perform the conversion.
    Perceives aromaticity.
    Adopted from rmgpy/molecule/converter.py

    Args:
        mol (Molecule): An RMG Molecule object for the conversion.
        remove_h (bool, optional): Whether to remove hydrogen atoms from the molecule, ``True`` to remove.
        sanitize (bool, optional): Whether to sanitize the RDKit molecule, ``True`` to sanitize.

    Returns:
        RDMol: An RDKit molecule object corresponding to the input RMG Molecule object.
    """
    atom_id_map = dict()

    # only manipulate a copy of ``mol``
    mol_copy = mol.copy(deep=True)
    if not mol_copy.atom_ids_valid():
        mol_copy.assign_atom_ids()
    for i, atom in enumerate(mol_copy.atoms):
        atom_id_map[atom.id] = i  # keeps the original atom order before sorting
    # mol_copy.sort_atoms()  # Sort the atoms before converting to ensure output is consistent between different runs
    atoms_copy = mol_copy.vertices

    rd_mol = Chem.rdchem.EditableMol(Chem.rdchem.Mol())
    for rmg_atom in atoms_copy:
        rd_atom = Chem.rdchem.Atom(rmg_atom.element.symbol)
        if rmg_atom.element.isotope != -1:
            rd_atom.SetIsotope(rmg_atom.element.isotope)
        rd_atom.SetNumRadicalElectrons(rmg_atom.radical_electrons)
        rd_atom.SetFormalCharge(rmg_atom.charge)
        if rmg_atom.element.symbol == 'C' and rmg_atom.lone_pairs == 1 and mol_copy.multiplicity == 1:
            # hard coding for carbenes
            rd_atom.SetNumRadicalElectrons(2)
        if not (remove_h and rmg_atom.symbol == 'H'):
            rd_mol.AddAtom(rd_atom)

    rd_bonds = Chem.rdchem.BondType
    orders = {'S': rd_bonds.SINGLE, 'D': rd_bonds.DOUBLE, 'T': rd_bonds.TRIPLE, 'B': rd_bonds.AROMATIC,
              'Q': rd_bonds.QUADRUPLE}
    # Add the bonds
    for atom1 in atoms_copy:
        for atom2, bond12 in atom1.edges.items():
            if bond12.is_hydrogen_bond():
                continue
            if atoms_copy.index(atom1) < atoms_copy.index(atom2):
                rd_mol.AddBond(atom_id_map[atom1.id], atom_id_map[atom2.id], orders[bond12.get_order_str()])

    # Make editable mol and rectify the molecule
    rd_mol = rd_mol.GetMol()
    if sanitize:
        Chem.SanitizeMol(rd_mol)
    if remove_h:
        rd_mol = Chem.RemoveHs(rd_mol, sanitize=sanitize)
    return rd_mol

Source File: chemistry.py From guacamol with MIT License

4 votes

def filter_and_canonicalize(smiles: str, holdout_set, holdout_fps, neutralization_rxns, tanimoto_cutoff=0.5,
                            include_stereocenters=False):
    """
    Args:
        smiles: the molecule to process
        holdout_set: smiles of the holdout set
        holdout_fps: ECFP4 fingerprints of the holdout set
        neutralization_rxns: neutralization rdkit reactions
        tanimoto_cutoff: Remove molecules with a higher ECFP4 tanimoto similarity than this cutoff from the set
        include_stereocenters: whether to keep stereocenters during canonicalization

    Returns:
        list with canonical smiles as a list with one element, or a an empty list. This is to perform a flatmap:
    """
    try:
        # Drop out if too long
        if len(smiles) > 200:
            return []
        mol = Chem.MolFromSmiles(smiles)
        # Drop out if invalid
        if mol is None:
            return []
        mol = Chem.RemoveHs(mol)

        # We only accept molecules consisting of H, B, C, N, O, F, Si, P, S, Cl, aliphatic Se, Br, I.
        metal_smarts = Chem.MolFromSmarts('[!#1!#5!#6!#7!#8!#9!#14!#15!#16!#17!#34!#35!#53]')

        has_metal = mol.HasSubstructMatch(metal_smarts)

        # Exclude molecules containing the forbidden elements.
        if has_metal:
            print(f'metal {smiles}')
            return []

        canon_smi = Chem.MolToSmiles(mol, isomericSmiles=include_stereocenters)

        # Drop out if too long canonicalized:
        if len(canon_smi) > 100:
            return []
        # Balance charges if unbalanced
        if canon_smi.count('+') - canon_smi.count('-') != 0:
            new_mol, changed = neutralise_charges(mol, reactions=neutralization_rxns)
            if changed:
                mol = new_mol
                canon_smi = Chem.MolToSmiles(mol, isomericSmiles=include_stereocenters)

        # Get most similar to holdout fingerprints, and exclude too similar molecules.
        max_tanimoto = highest_tanimoto_precalc_fps(mol, holdout_fps)
        if max_tanimoto < tanimoto_cutoff and canon_smi not in holdout_set:
            return [canon_smi]
        else:
            print("Exclude: {} {}".format(canon_smi, max_tanimoto))
    except Exception as e:
        print(e)
    return []

Python rdkit.Chem.RemoveHs() Examples