Python rdkit.Chem.AllChem.GetMorganFingerprint() Examples
The following are 30
code examples of rdkit.Chem.AllChem.GetMorganFingerprint().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rdkit.Chem.AllChem
, or try the search function
.
Example #1
Source File: chemTopicModel.py From CheTo with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _generateFPs(mol,fragmentMethod='Morgan'): aBits={} fp=None # circular Morgan fingerprint fragmentation, we use a simple invariant than ususal here if fragmentMethod=='Morgan': tmp={} fp = AllChem.GetMorganFingerprint(mol,radius=2,invariants=utilsFP.generateAtomInvariant(mol),bitInfo=tmp) aBits = utilsFP.getMorganEnvironment(mol, tmp, fp=fp, minRad=2) fp = fp.GetNonzeroElements() # path-based RDKit fingerprint fragmentation elif fragmentMethod=='RDK': fp = AllChem.UnfoldedRDKFingerprintCountBased(mol,maxPath=5,minPath=3,bitInfo=aBits) fp = fp.GetNonzeroElements() # get the final BRICS fragmentation (= smallest possible BRICS fragments of a molecule) elif fragmentMethod=='Brics': fragMol=BRICS.BreakBRICSBonds(mol) propSmi = _prepBRICSSmiles(fragMol) fp=Counter(propSmi.split('.')) else: print("Unknown fragment method") return fp, aBits # this function is not part of the class due to parallelisation # generate the fragments of a molecule, return a map with moleculeID and fragment dict
Example #2
Source File: mol_utils.py From GLN with MIT License | 6 votes |
def new_mol(self, name): if self.sanitized: mol = Chem.MolFromSmiles(name) else: mol = Chem.MolFromSmarts(name) if mol is None: return None else: mg = MolGraph(name, self.sanitized, mol=mol) if self.fp_degree > 0: bi = {} if self.fp_info else None feat = AllChem.GetMorganFingerprint(mol, self.fp_degree, bitInfo=bi, invariants=self._get_inv(mol)) on_bits = list(feat.GetNonzeroElements().keys()) mg.fingerprints = on_bits mg.fp_info = bi return mg
Example #3
Source File: vectorizers.py From Deep-Drug-Coder with MIT License | 6 votes |
def transform_mol(self, mol, misses=False): """ transforms the mol into a dense array using the fitted keys as index :parameter mol: the RDKit molecule to be transformed :parameter misses: wheter to return the number of key misses for the molecule """ assert type(self.keys) is np.ndarray, "keys are not defined or is not an np.array, has the .fit(mols) function been used?" #Get fingerprint as a dictionary fp = AllChem.GetMorganFingerprint(mol,self.radius) fp_d = fp.GetNonzeroElements() #Prepare the array, and set the values #TODO is there a way to vectorize and speed up this? arr = np.zeros((self.dims,)) _misses = 0 for key, value in fp_d.items(): if key in self.keys: arr[self.keys == key] = value else: _misses = _misses + 1 if misses: return arr, _misses else: return arr
Example #4
Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0 | 6 votes |
def NP_score(smile): mol = Chem.MolFromSmiles(smile) fp = Chem.GetMorganFingerprint(mol, 2) bits = fp.GetNonzeroElements() # calculating the score score = 0. for bit in bits: score += NP_model.get(bit, 0) score /= float(mol.GetNumAtoms()) # preventing score explosion for exotic molecules if score > 4: score = 4. + math.log10(score - 4. + 1.) if score < -4: score = -4. - math.log10(-4. - score + 1.) val = np.clip(remap(score, -3, 1), 0.0, 1.0) return val
Example #5
Source File: fingerprint.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 6 votes |
def CalculateMorganFingerprint(mol, radius=2): """ ################################################################# Calculate Morgan Usage: result=CalculateMorganFingerprint(mol) Input: mol is a molecule object. radius is a radius. Output: result is a tuple form. The first is the number of fingerprints. The second is a dict form whose keys are the position which this molecule has some substructure. The third is the DataStructs which is used for calculating the similarity. ################################################################# """ res = AllChem.GetMorganFingerprint(mol, radius) return res.GetLength(), res.GetNonzeroElements(), res
Example #6
Source File: scoring_functions.py From GB-GA with MIT License | 5 votes |
def get_ECFP4(mol): return AllChem.GetMorganFingerprint(mol, 2)
Example #7
Source File: drd2_scorer.py From iclr19-graph2graph with MIT License | 5 votes |
def fingerprints_from_mol(mol): fp = AllChem.GetMorganFingerprint(mol, 3, useCounts=True, useFeatures=True) size = 2048 nfp = np.zeros((1, size), np.int32) for idx,v in fp.GetNonzeroElements().items(): nidx = idx%size nfp[0, nidx] += int(v) return nfp
Example #8
Source File: scoring_functions.py From GB-GA with MIT License | 5 votes |
def get_ECFP6(mol): return AllChem.GetMorganFingerprint(mol, 3)
Example #9
Source File: scoring_functions.py From GB-GA with MIT License | 5 votes |
def get_FCFP4(mol): return AllChem.GetMorganFingerprint(mol, 2, useFeatures=True)
Example #10
Source File: scoring_functions.py From GB-GA with MIT License | 5 votes |
def get_FCFP6(mol): return AllChem.GetMorganFingerprint(mol, 3, useFeatures=True)
Example #11
Source File: fingerprints.py From guacamol with MIT License | 5 votes |
def get_ECFP4(self, mol: Mol): return AllChem.GetMorganFingerprint(mol, 2)
Example #12
Source File: fingerprints.py From guacamol with MIT License | 5 votes |
def get_ECFP6(self, mol: Mol): return AllChem.GetMorganFingerprint(mol, 3)
Example #13
Source File: fingerprints.py From guacamol with MIT License | 5 votes |
def get_FCFP4(self, mol: Mol): return AllChem.GetMorganFingerprint(mol, 2, useFeatures=True)
Example #14
Source File: fingerprints.py From guacamol with MIT License | 5 votes |
def get_FCFP6(self, mol: Mol): return AllChem.GetMorganFingerprint(mol, 3, useFeatures=True)
Example #15
Source File: metric.py From DrugEx with MIT License | 5 votes |
def diversity(fake_path, real_path=None, is_active=False): """ Molecular diversity measurement based on Tanimoto-distance on ECFP6 fingerprints, including, intra-diversity and inter-diversity. Arguments: fake_path (str): the file path of molecules that need to measuring diversity real_path (str, optional): the file path of molecules as the reference, if it is provided, the inter-diversity will be calculated; otherwise, the intra-diversity will be calculated. is_active (bool, optional): selecting only active ligands (True) or all of the molecules (False) if it is true, the molecule with PCHEMBL_VALUE >= 6.5 or SCORE > 0.5 will be selected. (Default: False) Returns: df (DataFrame): the table that contains columns of CANONICAL_SMILES and diversity value for each molecules """ fake = pd.read_table(fake_path) fake = fake[fake.SCORE > (0.5 if is_active else 0)] fake = fake.drop_duplicates(subset='CANONICAL_SMILES') fake_fps, real_fps = [], [] for i, row in fake.iterrows(): mol = Chem.MolFromSmiles(row.CANONICAL_SMILES) fake_fps.append(AllChem.GetMorganFingerprint(mol, 3)) if real_path: real = pd.read_table(real_path) real = real[real.PCHEMBL_VALUE >= (6.5 if is_active else 0)] for i, row in real.iterrows(): mol = Chem.MolFromSmiles(row.CANONICAL_SMILES) real_fps.append(AllChem.GetMorganFingerprint(mol, 3)) else: real_fps = fake_fps method = np.min if real_path else np.mean dist = 1 - np.array([method(DataStructs.BulkTanimotoSimilarity(f, real_fps)) for f in fake_fps]) fake['DIST'] = dist return fake
Example #16
Source File: similarity.py From chemprop with MIT License | 5 votes |
def morgan_similarity(smiles_1: List[str], smiles_2: List[str], radius: int, sample_rate: float): """ Determines the similarity between the morgan fingerprints of two lists of smiles strings. :param smiles_1: A list of smiles strings. :param smiles_2: A list of smiles strings. :param radius: The radius of the morgan fingerprints. :param sample_rate: Rate at which to sample pairs of molecules for Morgan similarity (to reduce time). """ # Compute similarities similarities = [] num_pairs = len(smiles_1) * len(smiles_2) # Sample to improve speed if sample_rate < 1.0: sample_num_pairs = sample_rate * num_pairs sample_size = math.ceil(math.sqrt(sample_num_pairs)) sample_smiles_1 = np.random.choice(smiles_1, size=sample_size, replace=True) sample_smiles_2 = np.random.choice(smiles_2, size=sample_size, replace=True) else: sample_smiles_1, sample_smiles_2 = smiles_1, smiles_2 sample_num_pairs = len(sample_smiles_1) * len(sample_smiles_2) for smile_1, smile_2 in tqdm(product(sample_smiles_1, sample_smiles_2), total=sample_num_pairs): mol_1, mol_2 = Chem.MolFromSmiles(smile_1), Chem.MolFromSmiles(smile_2) fp_1, fp_2 = AllChem.GetMorganFingerprint(mol_1, radius), AllChem.GetMorganFingerprint(mol_2, radius) similarity = DataStructs.DiceSimilarity(fp_1, fp_2) similarities.append(similarity) similarities = np.array(similarities) # Print results print() print(f'Average dice similarity = {np.mean(similarities):.4f} +/- {np.std(similarities):.4f}') print(f'Minimum dice similarity = {np.min(similarities):.4f}') print(f'Maximum dice similarity = {np.max(similarities):.4f}') print() print('Percentiles for dice similarity') print(' | '.join([f'{i}% = {np.percentile(similarities, i):.4f}' for i in range(0, 101, 10)]))
Example #17
Source File: 2_to_fingerprint.py From mhfp with MIT License | 5 votes |
def convert(subset): target = '/cluster/chembl/chembl.' + str(subset) + '.smi' actives = pd.read_csv(target, sep=' ', usecols=[0], header=None) mh = MHFPEncoder() with open('/cluster/chembl/chembl.' + str(subset) + '.mhfp6', 'w+') as f: for _, row in actives.iterrows(): mol = AllChem.MolFromSmiles(row[0]) if mol: fp_vals = ','.join(map(str, mh.encode_mol(mol))) f.write(fp_vals + '\n') with open('/cluster/chembl/chembl.' + str(subset) + '.mhecfp4', 'w+') as f: for _, row in actives.iterrows(): mol = AllChem.MolFromSmiles(row[0]) if mol: fp_vals = ','.join(map(str, mh.from_sparse_array([*AllChem.GetMorganFingerprint(mol, 2).GetNonzeroElements()]))) f.write(fp_vals + '\n') with open('/cluster/chembl/chembl.' + str(subset) + '.ecfp4', 'w+') as f: for _, row in actives.iterrows(): mol = AllChem.MolFromSmiles(row[0]) if mol: fp_vals = ','.join(map(str, AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048))) f.write(fp_vals + '\n')
Example #18
Source File: drd2_scorer.py From hgraph2graph with MIT License | 5 votes |
def fingerprints_from_mol(mol): fp = AllChem.GetMorganFingerprint(mol, 3, useCounts=True, useFeatures=True) size = 2048 nfp = np.zeros((1, size), np.int32) for idx,v in fp.GetNonzeroElements().items(): nidx = idx%size nfp[0, nidx] += int(v) return nfp
Example #19
Source File: fingerprint.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def CalculateFCFP6Fingerprint(mol, radius=3, nBits=1024): """ ################################################################# Calculate FCFP6 Usage: result=CalculateFCFP4Fingerprint(mol) Input: mol is a molecule object. radius is a radius. Output: result is a tuple form. The first is the vector of fingerprints. The second is a dict form whose keys are the position which this molecule has some substructure. The third is the DataStructs which is used for calculating the similarity. ################################################################# """ res = AllChem.GetMorganFingerprint(mol, radius, useFeatures=True) fp = tuple( AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits, useFeatures=True) ) return fp, res.GetNonzeroElements(), res ################################################################
Example #20
Source File: fingerprint.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def CalculateFCFP2Fingerprint(mol, radius=1, nBits=1024): """ ################################################################# Calculate FCFP2 Usage: result=CalculateFCFP2Fingerprint(mol) Input: mol is a molecule object. radius is a radius. Output: result is a tuple form. The first is the vector of fingerprints. The second is a dict form whose keys are the position which this molecule has some substructure. The third is the DataStructs which is used for calculating the similarity. ################################################################# """ res = AllChem.GetMorganFingerprint(mol, radius, useFeatures=True) fp = tuple( AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits, useFeatures=True) ) return fp, res.GetNonzeroElements(), res
Example #21
Source File: fingerprint.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def CalculateECFP6Fingerprint(mol, radius=3): """ ################################################################# Calculate ECFP6 Usage: result=CalculateECFP6Fingerprint(mol) Input: mol is a molecule object. radius is a radius. Output: result is a tuple form. The first is the vector of fingerprints. The second is a dict form whose keys are the position which this molecule has some substructure. The third is the DataStructs which is used for calculating the similarity. ################################################################# """ res = AllChem.GetMorganFingerprint(mol, radius) fp = tuple(AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=1024)) return fp, res.GetNonzeroElements(), res
Example #22
Source File: fingerprint.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def CalculateECFP4Fingerprint(mol, radius=2): """ ################################################################# Calculate ECFP4 Usage: result=CalculateECFP4Fingerprint(mol) Input: mol is a molecule object. radius is a radius. Output: result is a tuple form. The first is the vector of fingerprints. The second is a dict form whose keys are the position which this molecule has some substructure. The third is the DataStructs which is used for calculating the similarity. ################################################################# """ res = AllChem.GetMorganFingerprint(mol, radius) fp = tuple(AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=1024)) return fp, res.GetNonzeroElements(), res
Example #23
Source File: fingerprint.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def CalculateECFP2Fingerprint(mol, radius=1): """ ################################################################# Calculate ECFP2 Usage: result=CalculateECFP2Fingerprint(mol) Input: mol is a molecule object. radius is a radius. Output: result is a tuple form. The first is the vector of fingerprints. The second is a dict form whose keys are the position which this molecule has some substructure. The third is the DataStructs which is used for calculating the similarity. ################################################################# """ res = AllChem.GetMorganFingerprint(mol, radius) fp = tuple(AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=1024)) return fp, res.GetNonzeroElements(), res
Example #24
Source File: features.py From mol2vec with BSD 3-Clause "New" or "Revised" License | 5 votes |
def mol2alt_sentence(mol, radius): """Same as mol2sentence() expect it only returns the alternating sentence Calculates ECFP (Morgan fingerprint) and returns identifiers of substructures as 'sentence' (string). Returns a tuple with 1) a list with sentence for each radius and 2) a sentence with identifiers from all radii combined. NOTE: Words are ALWAYS reordered according to atom order in the input mol object. NOTE: Due to the way how Morgan FPs are generated, number of identifiers at each radius is smaller Parameters ---------- mol : rdkit.Chem.rdchem.Mol radius : float Fingerprint radius Returns ------- list alternating sentence combined """ radii = list(range(int(radius) + 1)) info = {} _ = AllChem.GetMorganFingerprint(mol, radius, bitInfo=info) # info: dictionary identifier, atom_idx, radius mol_atoms = [a.GetIdx() for a in mol.GetAtoms()] dict_atoms = {x: {r: None for r in radii} for x in mol_atoms} for element in info: for atom_idx, radius_at in info[element]: dict_atoms[atom_idx][radius_at] = element # {atom number: {fp radius: identifier}} # merge identifiers alternating radius to sentence: atom 0 radius0, atom 0 radius 1, etc. identifiers_alt = [] for atom in dict_atoms: # iterate over atoms for r in radii: # iterate over radii identifiers_alt.append(dict_atoms[atom][r]) alternating_sentence = map(str, [x for x in identifiers_alt if x]) return list(alternating_sentence)
Example #25
Source File: helpers.py From mol2vec with BSD 3-Clause "New" or "Revised" License | 5 votes |
def depict_identifier(mol, identifier, radius, useFeatures=False, **kwargs): """Depict an identifier in Morgan fingerprint. Parameters ---------- mol : rdkit.Chem.rdchem.Mol RDKit molecule identifier : int or str Feature identifier from Morgan fingerprint radius : int Radius of Morgan FP useFeatures : bool Use feature-based Morgan FP Returns ------- IPython.display.SVG """ identifier = int(identifier) info = {} AllChem.GetMorganFingerprint(mol, radius, bitInfo=info, useFeatures=useFeatures) if identifier in info.keys(): atoms, radii = zip(*info[identifier]) return depict_atoms(mol, atoms, radii, **kwargs) else: return mol_to_svg(mol, **kwargs)
Example #26
Source File: scscore.py From ASKCOS with Mozilla Public License 2.0 | 5 votes |
def load_model(self, FP_len=1024, model_tag='1024bool'): self.FP_len = FP_len if model_tag != '1024bool' and model_tag != '1024uint8' and model_tag != '2048bool': MyLogger.print_and_log( 'Non-existent SCScore model requested: {}. Using "1024bool" model'.format(model_tag), scscore_prioritizer_loc, level=2) model_tag = '1024bool' filename = 'trained_model_path_'+model_tag with open(gc.SCScore_Prioritiaztion[filename], 'rb') as fid: self.vars = pickle.load(fid) if gc.DEBUG: MyLogger.print_and_log('Loaded synthetic complexity score prioritization model from {}'.format( gc.SCScore_Prioritiaztion[filename]), scscore_prioritizer_loc) if 'uint8' in gc.SCScore_Prioritiaztion[filename]: def mol_to_fp(mol): if mol is None: return np.array((self.FP_len,), dtype=np.uint8) fp = AllChem.GetMorganFingerprint( mol, self.FP_rad, useChirality=True) # uitnsparsevect fp_folded = np.zeros((self.FP_len,), dtype=np.uint8) for k, v in fp.GetNonzeroElements().items(): fp_folded[k % self.FP_len] += v return np.array(fp_folded) else: def mol_to_fp(mol): if mol is None: return np.zeros((self.FP_len,), dtype=np.float32) return np.array(AllChem.GetMorganFingerprintAsBitVect(mol, self.FP_rad, nBits=self.FP_len, useChirality=True), dtype=np.bool) self.mol_to_fp = mol_to_fp self.pricer = Pricer() self.pricer.load() self._restored = True self._loaded = True
Example #27
Source File: vectorizers.py From Deep-Drug-Coder with MIT License | 5 votes |
def fit(self, mols): """Analyses the molecules and creates the key index for the creation of the dense array""" keys=set() for mol in mols: fp = AllChem.GetMorganFingerprint(mol,self.radius) keys.update(fp.GetNonzeroElements().keys()) keys = list(keys) keys.sort() self.keys= np.array(keys) self.dims = len(self.keys)
Example #28
Source File: scoring_functions.py From REINVENT with MIT License | 5 votes |
def fingerprints_from_mol(cls, mol): fp = AllChem.GetMorganFingerprint(mol, 3, useCounts=True, useFeatures=True) size = 2048 nfp = np.zeros((1, size), np.int32) for idx,v in fp.GetNonzeroElements().items(): nidx = idx%size nfp[0, nidx] += int(v) return nfp
Example #29
Source File: scoring_functions.py From REINVENT with MIT License | 5 votes |
def __call__(self, smile): mol = Chem.MolFromSmiles(smile) if mol: fp = AllChem.GetMorganFingerprint(mol, 2, useCounts=True, useFeatures=True) score = DataStructs.TanimotoSimilarity(self.query_fp, fp) score = min(score, self.k) / self.k return float(score) return 0.0
Example #30
Source File: scoring_functions.py From REINVENT with MIT License | 5 votes |
def __init__(self): query_mol = Chem.MolFromSmiles(self.query_structure) self.query_fp = AllChem.GetMorganFingerprint(query_mol, 2, useCounts=True, useFeatures=True)