Python rdkit.Chem.rdMolDescriptors.GetMorganFingerprint() Examples
The following are 11
code examples of rdkit.Chem.rdMolDescriptors.GetMorganFingerprint().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rdkit.Chem.rdMolDescriptors
, or try the search function
.
Example #1
Source File: eval_mol2vec_results.py From deepchem with MIT License | 6 votes |
def main() : model = models.KeyedVectors.load_word2vec_format("vec.txt") embeddings = list() # Using canonical smiles for glycine, as in original research paper mol = Chem.MolFromSmiles("C(C(=O)O)N") try: info = {} rdMolDescriptors.GetMorganFingerprint(mol, 0, bitInfo=info) keys = info.keys() keys_list = list(keys) totalvec = np.zeros(200) for k in keys_list: wordvec = model.wv[str(k)] totalvec = np.add(totalvec, wordvec) embeddings.append(totalvec) except Exception as e: print(e) pass print(embeddings[0])
Example #2
Source File: mol_distance.py From ORGAN with GNU General Public License v2.0 | 6 votes |
def NP_score(mol, fscore=None): if fscore is None: fscore =readNPModel() if mol is None: raise ValueError('invalid molecule') fp = rdMolDescriptors.GetMorganFingerprint(mol, 2) bits = fp.GetNonzeroElements() # calculating the score score = 0. for bit in bits: score += fscore.get(bit, 0) score /= float(mol.GetNumAtoms()) # preventing score explosion for exotic molecules if score > 4: score = 4. + math.log10(score - 4. + 1.) if score < -4: score = -4. - math.log10(-4. - score + 1.) return score
Example #3
Source File: fingerprints.py From deepchem with MIT License | 5 votes |
def _featurize(self, mol): """ Calculate circular fingerprint. Parameters ---------- mol : RDKit Mol Molecule. """ from rdkit import Chem from rdkit.Chem import rdMolDescriptors if self.sparse: info = {} fp = rdMolDescriptors.GetMorganFingerprint( mol, self.radius, useChirality=self.chiral, useBondTypes=self.bonds, useFeatures=self.features, bitInfo=info) fp = fp.GetNonzeroElements() # convert to a dict # generate SMILES for fragments if self.smiles: fp_smiles = {} for fragment_id, count in fp.items(): root, radius = info[fragment_id][0] env = Chem.FindAtomEnvironmentOfRadiusN(mol, radius, root) frag = Chem.PathToSubmol(mol, env) smiles = Chem.MolToSmiles(frag) fp_smiles[fragment_id] = {'smiles': smiles, 'count': count} fp = fp_smiles else: fp = rdMolDescriptors.GetMorganFingerprintAsBitVect( mol, self.radius, nBits=self.size, useChirality=self.chiral, useBondTypes=self.bonds, useFeatures=self.features) return fp
Example #4
Source File: mol2vec.py From deepchem with MIT License | 5 votes |
def main() : sdf_root_path = "/media/data/pubchem/SDF" for path, dirs, filenames in os.walk(sdf_root_path) : for filename in filenames: filepath = os.path.join(sdf_root_path, filename) # This SDF file fails to parse with RDKit on Ubuntu 16.04 if "Compound_102125001_102150000" in filename: continue with gzip.open(filepath, 'rb') as myfile: suppl = Chem.ForwardSDMolSupplier(myfile) for mol in suppl: if not mol: continue try : info = {} rdMolDescriptors.GetMorganFingerprint(mol,1,bitInfo=info) keys = info.keys() keys_list = list(keys) for k in keys_list: print(k,end=' ') print() except Exception: pass
Example #5
Source File: fingerprints.py From PADME with MIT License | 5 votes |
def _featurize(self, mol, smiles=None): """ Calculate circular fingerprint. Parameters ---------- mol : RDKit Mol Molecule. """ if self.sparse: info = {} fp = rdMolDescriptors.GetMorganFingerprint( mol, self.radius, useChirality=self.chiral, useBondTypes=self.bonds, useFeatures=self.features, bitInfo=info) fp = fp.GetNonzeroElements() # convert to a dict # generate SMILES for fragments if self.calc_smiles: fp_smiles = {} for fragment_id, count in fp.items(): root, radius = info[fragment_id][0] env = Chem.FindAtomEnvironmentOfRadiusN(mol, radius, root) frag = Chem.PathToSubmol(mol, env) smiles = Chem.MolToSmiles(frag) fp_smiles[fragment_id] = {'smiles': smiles, 'count': count} fp = fp_smiles else: fp = ComparableFingerprint(mol, self.radius, nBits=self.size, useChirality=self.chiral, useBondTypes=self.bonds, useFeatures=self.features, smiles=smiles) return fp
Example #6
Source File: npscorer.py From moses with MIT License | 5 votes |
def scoreMolWConfidence(mol, fscore): """Next to the NP Likeness Score, this function outputs a confidence value between 0..1 that descibes how many fragments of the tested molecule were found in the model data set (1: all fragments were found). Returns namedtuple NPLikeness(nplikeness, confidence)""" if mol is None: raise ValueError('invalid molecule') fp = rdMolDescriptors.GetMorganFingerprint(mol, 2) bits = fp.GetNonzeroElements() # calculating the score score = 0.0 bits_found = 0 for bit in bits: if bit in fscore: bits_found += 1 score += fscore[bit] score /= float(mol.GetNumAtoms()) confidence = float(bits_found / len(bits)) # preventing score explosion for exotic molecules if score > 4: score = 4. + math.log10(score - 4. + 1.) elif score < -4: score = -4. - math.log10(-4. - score + 1.) NPLikeness = namedtuple("NPLikeness", "nplikeness,confidence") return NPLikeness(score, confidence)
Example #7
Source File: sascorer.py From icml18-jtnn with MIT License | 4 votes |
def calculateScore(m): if _fscores is None: readFragmentScores() # fragment score fp = rdMolDescriptors.GetMorganFingerprint(m,2) #<- 2 is the *radius* of the circular fingerprint fps = fp.GetNonzeroElements() score1 = 0. nf = 0 for bitId,v in iteritems(fps): nf += v sfp = bitId score1 += _fscores.get(sfp,-4)*v score1 /= nf # features score nAtoms = m.GetNumAtoms() nChiralCenters = len(Chem.FindMolChiralCenters(m,includeUnassigned=True)) ri = m.GetRingInfo() nBridgeheads,nSpiro=numBridgeheadsAndSpiro(m,ri) nMacrocycles=0 for x in ri.AtomRings(): if len(x)>8: nMacrocycles+=1 sizePenalty = nAtoms**1.005 - nAtoms stereoPenalty = math.log10(nChiralCenters+1) spiroPenalty = math.log10(nSpiro+1) bridgePenalty = math.log10(nBridgeheads+1) macrocyclePenalty = 0. # --------------------------------------- # This differs from the paper, which defines: # macrocyclePenalty = math.log10(nMacrocycles+1) # This form generates better results when 2 or more macrocycles are present if nMacrocycles > 0: macrocyclePenalty = math.log10(2) score2 = 0. -sizePenalty -stereoPenalty -spiroPenalty -bridgePenalty -macrocyclePenalty # correction for the fingerprint density # not in the original publication, added in version 1.1 # to make highly symmetrical molecules easier to synthetise score3 = 0. if nAtoms > len(fps): score3 = math.log(float(nAtoms) / len(fps)) * .5 sascore = score1 + score2 + score3 # need to transform "raw" value into scale between 1 and 10 min = -4.0 max = 2.5 sascore = 11. - (sascore - min + 1) / (max - min) * 9. # smooth the 10-end if sascore > 8.: sascore = 8. + math.log(sascore+1.-9.) if sascore > 10.: sascore = 10.0 elif sascore < 1.: sascore = 1.0 return sascore
Example #8
Source File: sascorer.py From icml18-jtnn with MIT License | 4 votes |
def calculateScore(m): if _fscores is None: readFragmentScores() # fragment score fp = rdMolDescriptors.GetMorganFingerprint(m,2) #<- 2 is the *radius* of the circular fingerprint fps = fp.GetNonzeroElements() score1 = 0. nf = 0 for bitId,v in iteritems(fps): nf += v sfp = bitId score1 += _fscores.get(sfp,-4)*v score1 /= nf # features score nAtoms = m.GetNumAtoms() nChiralCenters = len(Chem.FindMolChiralCenters(m,includeUnassigned=True)) ri = m.GetRingInfo() nBridgeheads,nSpiro=numBridgeheadsAndSpiro(m,ri) nMacrocycles=0 for x in ri.AtomRings(): if len(x)>8: nMacrocycles+=1 sizePenalty = nAtoms**1.005 - nAtoms stereoPenalty = math.log10(nChiralCenters+1) spiroPenalty = math.log10(nSpiro+1) bridgePenalty = math.log10(nBridgeheads+1) macrocyclePenalty = 0. # --------------------------------------- # This differs from the paper, which defines: # macrocyclePenalty = math.log10(nMacrocycles+1) # This form generates better results when 2 or more macrocycles are present if nMacrocycles > 0: macrocyclePenalty = math.log10(2) score2 = 0. -sizePenalty -stereoPenalty -spiroPenalty -bridgePenalty -macrocyclePenalty # correction for the fingerprint density # not in the original publication, added in version 1.1 # to make highly symmetrical molecules easier to synthetise score3 = 0. if nAtoms > len(fps): score3 = math.log(float(nAtoms) / len(fps)) * .5 sascore = score1 + score2 + score3 # need to transform "raw" value into scale between 1 and 10 min = -4.0 max = 2.5 sascore = 11. - (sascore - min + 1) / (max - min) * 9. # smooth the 10-end if sascore > 8.: sascore = 8. + math.log(sascore+1.-9.) if sascore > 10.: sascore = 10.0 elif sascore < 1.: sascore = 1.0 return sascore
Example #9
Source File: sascorer.py From sdvae with MIT License | 4 votes |
def calculateScore(m): if _fscores is None: readFragmentScores() # fragment score fp = rdMolDescriptors.GetMorganFingerprint(m,2) #<- 2 is the *radius* of the circular fingerprint fps = fp.GetNonzeroElements() score1 = 0. nf = 0 for bitId,v in iteritems(fps): nf += v sfp = bitId score1 += _fscores.get(sfp,-4)*v score1 /= nf # features score nAtoms = m.GetNumAtoms() nChiralCenters = len(Chem.FindMolChiralCenters(m,includeUnassigned=True)) ri = m.GetRingInfo() nBridgeheads,nSpiro=numBridgeheadsAndSpiro(m,ri) nMacrocycles=0 for x in ri.AtomRings(): if len(x)>8: nMacrocycles+=1 sizePenalty = nAtoms**1.005 - nAtoms stereoPenalty = math.log10(nChiralCenters+1) spiroPenalty = math.log10(nSpiro+1) bridgePenalty = math.log10(nBridgeheads+1) macrocyclePenalty = 0. # --------------------------------------- # This differs from the paper, which defines: # macrocyclePenalty = math.log10(nMacrocycles+1) # This form generates better results when 2 or more macrocycles are present if nMacrocycles > 0: macrocyclePenalty = math.log10(2) score2 = 0. -sizePenalty -stereoPenalty -spiroPenalty -bridgePenalty -macrocyclePenalty # correction for the fingerprint density # not in the original publication, added in version 1.1 # to make highly symmetrical molecules easier to synthetise score3 = 0. if nAtoms > len(fps): score3 = math.log(float(nAtoms) / len(fps)) * .5 sascore = score1 + score2 + score3 # need to transform "raw" value into scale between 1 and 10 min = -4.0 max = 2.5 sascore = 11. - (sascore - min + 1) / (max - min) * 9. # smooth the 10-end if sascore > 8.: sascore = 8. + math.log(sascore+1.-9.) if sascore > 10.: sascore = 10.0 elif sascore < 1.: sascore = 1.0 return sascore
Example #10
Source File: sascorer.py From sdvae with MIT License | 4 votes |
def calculateScore(m): if _fscores is None: readFragmentScores() # fragment score fp = rdMolDescriptors.GetMorganFingerprint(m,2) #<- 2 is the *radius* of the circular fingerprint fps = fp.GetNonzeroElements() score1 = 0. nf = 0 for bitId,v in iteritems(fps): nf += v sfp = bitId score1 += _fscores.get(sfp,-4)*v score1 /= nf # features score nAtoms = m.GetNumAtoms() nChiralCenters = len(Chem.FindMolChiralCenters(m,includeUnassigned=True)) ri = m.GetRingInfo() nBridgeheads,nSpiro=numBridgeheadsAndSpiro(m,ri) nMacrocycles=0 for x in ri.AtomRings(): if len(x)>8: nMacrocycles+=1 sizePenalty = nAtoms**1.005 - nAtoms stereoPenalty = math.log10(nChiralCenters+1) spiroPenalty = math.log10(nSpiro+1) bridgePenalty = math.log10(nBridgeheads+1) macrocyclePenalty = 0. # --------------------------------------- # This differs from the paper, which defines: # macrocyclePenalty = math.log10(nMacrocycles+1) # This form generates better results when 2 or more macrocycles are present if nMacrocycles > 0: macrocyclePenalty = math.log10(2) score2 = 0. -sizePenalty -stereoPenalty -spiroPenalty -bridgePenalty -macrocyclePenalty # correction for the fingerprint density # not in the original publication, added in version 1.1 # to make highly symmetrical molecules easier to synthetise score3 = 0. if nAtoms > len(fps): score3 = math.log(float(nAtoms) / len(fps)) * .5 sascore = score1 + score2 + score3 # need to transform "raw" value into scale between 1 and 10 min = -4.0 max = 2.5 sascore = 11. - (sascore - min + 1) / (max - min) * 9. # smooth the 10-end if sascore > 8.: sascore = 8. + math.log(sascore+1.-9.) if sascore > 10.: sascore = 10.0 elif sascore < 1.: sascore = 1.0 return sascore
Example #11
Source File: sascorer.py From iclr19-graph2graph with MIT License | 4 votes |
def calculateScore(m): if _fscores is None: readFragmentScores() # fragment score fp = rdMolDescriptors.GetMorganFingerprint(m,2) #<- 2 is the *radius* of the circular fingerprint fps = fp.GetNonzeroElements() score1 = 0. nf = 0 for bitId,v in iteritems(fps): nf += v sfp = bitId score1 += _fscores.get(sfp,-4)*v score1 /= nf # features score nAtoms = m.GetNumAtoms() nChiralCenters = len(Chem.FindMolChiralCenters(m,includeUnassigned=True)) ri = m.GetRingInfo() nBridgeheads,nSpiro=numBridgeheadsAndSpiro(m,ri) nMacrocycles=0 for x in ri.AtomRings(): if len(x)>8: nMacrocycles+=1 sizePenalty = nAtoms**1.005 - nAtoms stereoPenalty = math.log10(nChiralCenters+1) spiroPenalty = math.log10(nSpiro+1) bridgePenalty = math.log10(nBridgeheads+1) macrocyclePenalty = 0. # --------------------------------------- # This differs from the paper, which defines: # macrocyclePenalty = math.log10(nMacrocycles+1) # This form generates better results when 2 or more macrocycles are present if nMacrocycles > 0: macrocyclePenalty = math.log10(2) score2 = 0. -sizePenalty -stereoPenalty -spiroPenalty -bridgePenalty -macrocyclePenalty # correction for the fingerprint density # not in the original publication, added in version 1.1 # to make highly symmetrical molecules easier to synthetise score3 = 0. if nAtoms > len(fps): score3 = math.log(float(nAtoms) / len(fps)) * .5 sascore = score1 + score2 + score3 # need to transform "raw" value into scale between 1 and 10 min = -4.0 max = 2.5 sascore = 11. - (sascore - min + 1) / (max - min) * 9. # smooth the 10-end if sascore > 8.: sascore = 8. + math.log(sascore+1.-9.) if sascore > 10.: sascore = 10.0 elif sascore < 1.: sascore = 1.0 return sascore